btrfs-progs: check: Move count_csum_range function to check/common.c
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
429 {
430         struct device_record *rec1;
431         struct device_record *rec2;
432
433         rec1 = rb_entry(node1, struct device_record, node);
434         rec2 = rb_entry(node2, struct device_record, node);
435         if (rec1->devid > rec2->devid)
436                 return -1;
437         else if (rec1->devid < rec2->devid)
438                 return 1;
439         else
440                 return 0;
441 }
442
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
444 {
445         struct inode_record *rec;
446         struct inode_backref *backref;
447         struct inode_backref *orig;
448         struct inode_backref *tmp;
449         struct orphan_data_extent *src_orphan;
450         struct orphan_data_extent *dst_orphan;
451         struct rb_node *rb;
452         size_t size;
453         int ret;
454
455         rec = malloc(sizeof(*rec));
456         if (!rec)
457                 return ERR_PTR(-ENOMEM);
458         memcpy(rec, orig_rec, sizeof(*rec));
459         rec->refs = 1;
460         INIT_LIST_HEAD(&rec->backrefs);
461         INIT_LIST_HEAD(&rec->orphan_extents);
462         rec->holes = RB_ROOT;
463
464         list_for_each_entry(orig, &orig_rec->backrefs, list) {
465                 size = sizeof(*orig) + orig->namelen + 1;
466                 backref = malloc(size);
467                 if (!backref) {
468                         ret = -ENOMEM;
469                         goto cleanup;
470                 }
471                 memcpy(backref, orig, size);
472                 list_add_tail(&backref->list, &rec->backrefs);
473         }
474         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475                 dst_orphan = malloc(sizeof(*dst_orphan));
476                 if (!dst_orphan) {
477                         ret = -ENOMEM;
478                         goto cleanup;
479                 }
480                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
482         }
483         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
484         if (ret < 0)
485                 goto cleanup_rb;
486
487         return rec;
488
489 cleanup_rb:
490         rb = rb_first(&rec->holes);
491         while (rb) {
492                 struct file_extent_hole *hole;
493
494                 hole = rb_entry(rb, struct file_extent_hole, node);
495                 rb = rb_next(rb);
496                 free(hole);
497         }
498
499 cleanup:
500         if (!list_empty(&rec->backrefs))
501                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502                         list_del(&orig->list);
503                         free(orig);
504                 }
505
506         if (!list_empty(&rec->orphan_extents))
507                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508                         list_del(&orig->list);
509                         free(orig);
510                 }
511
512         free(rec);
513
514         return ERR_PTR(ret);
515 }
516
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
518                                       u64 objectid)
519 {
520         struct orphan_data_extent *orphan;
521
522         if (list_empty(orphan_extents))
523                 return;
524         printf("The following data extent is lost in tree %llu:\n",
525                objectid);
526         list_for_each_entry(orphan, orphan_extents, list) {
527                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
529                        orphan->disk_len);
530         }
531 }
532
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
534 {
535         u64 root_objectid = root->root_key.objectid;
536         int errors = rec->errors;
537
538         if (!errors)
539                 return;
540         /* reloc root errors, we print its corresponding fs root objectid*/
541         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542                 root_objectid = root->root_key.offset;
543                 fprintf(stderr, "reloc");
544         }
545         fprintf(stderr, "root %llu inode %llu errors %x",
546                 (unsigned long long) root_objectid,
547                 (unsigned long long) rec->ino, rec->errors);
548
549         if (errors & I_ERR_NO_INODE_ITEM)
550                 fprintf(stderr, ", no inode item");
551         if (errors & I_ERR_NO_ORPHAN_ITEM)
552                 fprintf(stderr, ", no orphan item");
553         if (errors & I_ERR_DUP_INODE_ITEM)
554                 fprintf(stderr, ", dup inode item");
555         if (errors & I_ERR_DUP_DIR_INDEX)
556                 fprintf(stderr, ", dup dir index");
557         if (errors & I_ERR_ODD_DIR_ITEM)
558                 fprintf(stderr, ", odd dir item");
559         if (errors & I_ERR_ODD_FILE_EXTENT)
560                 fprintf(stderr, ", odd file extent");
561         if (errors & I_ERR_BAD_FILE_EXTENT)
562                 fprintf(stderr, ", bad file extent");
563         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564                 fprintf(stderr, ", file extent overlap");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         int extent_type;
1439         int ret;
1440
1441         rec = active_node->current;
1442         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443         rec->found_file_extent = 1;
1444
1445         if (rec->extent_start == (u64)-1) {
1446                 rec->extent_start = key->offset;
1447                 rec->extent_end = key->offset;
1448         }
1449
1450         if (rec->extent_end > key->offset)
1451                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452         else if (rec->extent_end < key->offset) {
1453                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454                                            key->offset - rec->extent_end);
1455                 if (ret < 0)
1456                         return ret;
1457         }
1458
1459         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460         extent_type = btrfs_file_extent_type(eb, fi);
1461
1462         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1464                 if (num_bytes == 0)
1465                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466                 rec->found_size += num_bytes;
1467                 num_bytes = (num_bytes + mask) & ~mask;
1468         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472                 extent_offset = btrfs_file_extent_offset(eb, fi);
1473                 if (num_bytes == 0 || (num_bytes & mask))
1474                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475                 if (num_bytes + extent_offset >
1476                     btrfs_file_extent_ram_bytes(eb, fi))
1477                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479                     (btrfs_file_extent_compression(eb, fi) ||
1480                      btrfs_file_extent_encryption(eb, fi) ||
1481                      btrfs_file_extent_other_encoding(eb, fi)))
1482                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483                 if (disk_bytenr > 0)
1484                         rec->found_size += num_bytes;
1485         } else {
1486                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1487         }
1488         rec->extent_end = key->offset + num_bytes;
1489
1490         /*
1491          * The data reloc tree will copy full extents into its inode and then
1492          * copy the corresponding csums.  Because the extent it copied could be
1493          * a preallocated extent that hasn't been written to yet there may be no
1494          * csums to copy, ergo we won't have csums for our file extent.  This is
1495          * ok so just don't bother checking csums if the inode belongs to the
1496          * data reloc tree.
1497          */
1498         if (disk_bytenr > 0 &&
1499             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1500                 u64 found;
1501                 if (btrfs_file_extent_compression(eb, fi))
1502                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1503                 else
1504                         disk_bytenr += extent_offset;
1505
1506                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1507                                        &found);
1508                 if (ret < 0)
1509                         return ret;
1510                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1511                         if (found > 0)
1512                                 rec->found_csum_item = 1;
1513                         if (found < num_bytes)
1514                                 rec->some_csum_missing = 1;
1515                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1516                         if (found > 0)
1517                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1518                 }
1519         }
1520         return 0;
1521 }
1522
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524                             struct walk_control *wc)
1525 {
1526         struct btrfs_key key;
1527         u32 nritems;
1528         int i;
1529         int ret = 0;
1530         struct cache_tree *inode_cache;
1531         struct shared_node *active_node;
1532
1533         if (wc->root_level == wc->active_node &&
1534             btrfs_root_refs(&root->root_item) == 0)
1535                 return 0;
1536
1537         active_node = wc->nodes[wc->active_node];
1538         inode_cache = &active_node->inode_cache;
1539         nritems = btrfs_header_nritems(eb);
1540         for (i = 0; i < nritems; i++) {
1541                 btrfs_item_key_to_cpu(eb, &key, i);
1542
1543                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1544                         continue;
1545                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1546                         continue;
1547
1548                 if (active_node->current == NULL ||
1549                     active_node->current->ino < key.objectid) {
1550                         if (active_node->current) {
1551                                 active_node->current->checked = 1;
1552                                 maybe_free_inode_rec(inode_cache,
1553                                                      active_node->current);
1554                         }
1555                         active_node->current = get_inode_rec(inode_cache,
1556                                                              key.objectid, 1);
1557                         BUG_ON(IS_ERR(active_node->current));
1558                 }
1559                 switch (key.type) {
1560                 case BTRFS_DIR_ITEM_KEY:
1561                 case BTRFS_DIR_INDEX_KEY:
1562                         ret = process_dir_item(eb, i, &key, active_node);
1563                         break;
1564                 case BTRFS_INODE_REF_KEY:
1565                         ret = process_inode_ref(eb, i, &key, active_node);
1566                         break;
1567                 case BTRFS_INODE_EXTREF_KEY:
1568                         ret = process_inode_extref(eb, i, &key, active_node);
1569                         break;
1570                 case BTRFS_INODE_ITEM_KEY:
1571                         ret = process_inode_item(eb, i, &key, active_node);
1572                         break;
1573                 case BTRFS_EXTENT_DATA_KEY:
1574                         ret = process_file_extent(root, eb, i, &key,
1575                                                   active_node);
1576                         break;
1577                 default:
1578                         break;
1579                 };
1580         }
1581         return ret;
1582 }
1583
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585                              struct extent_buffer *eb, struct node_refs *nrefs,
1586                              u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588                             unsigned int ext_ref);
1589
1590 /*
1591  * Returns >0  Found error, not fatal, should continue
1592  * Returns <0  Fatal error, must exit the whole check
1593  * Returns 0   No errors found
1594  */
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596                                struct node_refs *nrefs, int *level, int ext_ref)
1597 {
1598         struct extent_buffer *cur = path->nodes[0];
1599         struct btrfs_key key;
1600         u64 cur_bytenr;
1601         u32 nritems;
1602         u64 first_ino = 0;
1603         int root_level = btrfs_header_level(root->node);
1604         int i;
1605         int ret = 0; /* Final return value */
1606         int err = 0; /* Positive error bitmap */
1607
1608         cur_bytenr = cur->start;
1609
1610         /* skip to first inode item or the first inode number change */
1611         nritems = btrfs_header_nritems(cur);
1612         for (i = 0; i < nritems; i++) {
1613                 btrfs_item_key_to_cpu(cur, &key, i);
1614                 if (i == 0)
1615                         first_ino = key.objectid;
1616                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617                     (first_ino && first_ino != key.objectid))
1618                         break;
1619         }
1620         if (i == nritems) {
1621                 path->slots[0] = nritems;
1622                 return 0;
1623         }
1624         path->slots[0] = i;
1625
1626 again:
1627         err |= check_inode_item(root, path, ext_ref);
1628
1629         /* modify cur since check_inode_item may change path */
1630         cur = path->nodes[0];
1631
1632         if (err & LAST_ITEM)
1633                 goto out;
1634
1635         /* still have inode items in thie leaf */
1636         if (cur->start == cur_bytenr)
1637                 goto again;
1638
1639         /*
1640          * we have switched to another leaf, above nodes may
1641          * have changed, here walk down the path, if a node
1642          * or leaf is shared, check whether we can skip this
1643          * node or leaf.
1644          */
1645         for (i = root_level; i >= 0; i--) {
1646                 if (path->nodes[i]->start == nrefs->bytenr[i])
1647                         continue;
1648
1649                 ret = update_nodes_refs(root, path->nodes[i]->start,
1650                                 path->nodes[i], nrefs, i, 0);
1651                 if (ret)
1652                         goto out;
1653
1654                 if (!nrefs->need_check[i]) {
1655                         *level += 1;
1656                         break;
1657                 }
1658         }
1659
1660         for (i = 0; i < *level; i++) {
1661                 free_extent_buffer(path->nodes[i]);
1662                 path->nodes[i] = NULL;
1663         }
1664 out:
1665         err &= ~LAST_ITEM;
1666         if (err && !ret)
1667                 ret = err;
1668         return ret;
1669 }
1670
1671 static void reada_walk_down(struct btrfs_root *root,
1672                             struct extent_buffer *node, int slot)
1673 {
1674         struct btrfs_fs_info *fs_info = root->fs_info;
1675         u64 bytenr;
1676         u64 ptr_gen;
1677         u32 nritems;
1678         int i;
1679         int level;
1680
1681         level = btrfs_header_level(node);
1682         if (level != 1)
1683                 return;
1684
1685         nritems = btrfs_header_nritems(node);
1686         for (i = slot; i < nritems; i++) {
1687                 bytenr = btrfs_node_blockptr(node, i);
1688                 ptr_gen = btrfs_node_ptr_generation(node, i);
1689                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1690         }
1691 }
1692
1693 /*
1694  * Check the child node/leaf by the following condition:
1695  * 1. the first item key of the node/leaf should be the same with the one
1696  *    in parent.
1697  * 2. block in parent node should match the child node/leaf.
1698  * 3. generation of parent node and child's header should be consistent.
1699  *
1700  * Or the child node/leaf pointed by the key in parent is not valid.
1701  *
1702  * We hope to check leaf owner too, but since subvol may share leaves,
1703  * which makes leaf owner check not so strong, key check should be
1704  * sufficient enough for that case.
1705  */
1706 static int check_child_node(struct extent_buffer *parent, int slot,
1707                             struct extent_buffer *child)
1708 {
1709         struct btrfs_key parent_key;
1710         struct btrfs_key child_key;
1711         int ret = 0;
1712
1713         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1714         if (btrfs_header_level(child) == 0)
1715                 btrfs_item_key_to_cpu(child, &child_key, 0);
1716         else
1717                 btrfs_node_key_to_cpu(child, &child_key, 0);
1718
1719         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1720                 ret = -EINVAL;
1721                 fprintf(stderr,
1722                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1723                         parent_key.objectid, parent_key.type, parent_key.offset,
1724                         child_key.objectid, child_key.type, child_key.offset);
1725         }
1726         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1727                 ret = -EINVAL;
1728                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1729                         btrfs_node_blockptr(parent, slot),
1730                         btrfs_header_bytenr(child));
1731         }
1732         if (btrfs_node_ptr_generation(parent, slot) !=
1733             btrfs_header_generation(child)) {
1734                 ret = -EINVAL;
1735                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1736                         btrfs_header_generation(child),
1737                         btrfs_node_ptr_generation(parent, slot));
1738         }
1739         return ret;
1740 }
1741
1742 /*
1743  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1744  * in every fs or file tree check. Here we find its all root ids, and only check
1745  * it in the fs or file tree which has the smallest root id.
1746  */
1747 static int need_check(struct btrfs_root *root, struct ulist *roots)
1748 {
1749         struct rb_node *node;
1750         struct ulist_node *u;
1751
1752         /*
1753          * @roots can be empty if it belongs to tree reloc tree
1754          * In that case, we should always check the leaf, as we can't use
1755          * the tree owner to ensure some other root will check it.
1756          */
1757         if (roots->nnodes == 1 || roots->nnodes == 0)
1758                 return 1;
1759
1760         node = rb_first(&roots->root);
1761         u = rb_entry(node, struct ulist_node, rb_node);
1762         /*
1763          * current root id is not smallest, we skip it and let it be checked
1764          * in the fs or file tree who hash the smallest root id.
1765          */
1766         if (root->objectid != u->val)
1767                 return 0;
1768
1769         return 1;
1770 }
1771
1772 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1773                                u64 *flags_ret)
1774 {
1775         struct btrfs_root *extent_root = root->fs_info->extent_root;
1776         struct btrfs_root_item *ri = &root->root_item;
1777         struct btrfs_extent_inline_ref *iref;
1778         struct btrfs_extent_item *ei;
1779         struct btrfs_key key;
1780         struct btrfs_path *path = NULL;
1781         unsigned long ptr;
1782         unsigned long end;
1783         u64 flags;
1784         u64 owner = 0;
1785         u64 offset;
1786         int slot;
1787         int type;
1788         int ret = 0;
1789
1790         /*
1791          * Except file/reloc tree, we can not have FULL BACKREF MODE
1792          */
1793         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1794                 goto normal;
1795
1796         /* root node */
1797         if (eb->start == btrfs_root_bytenr(ri))
1798                 goto normal;
1799
1800         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1801                 goto full_backref;
1802
1803         owner = btrfs_header_owner(eb);
1804         if (owner == root->objectid)
1805                 goto normal;
1806
1807         path = btrfs_alloc_path();
1808         if (!path)
1809                 return -ENOMEM;
1810
1811         key.objectid = btrfs_header_bytenr(eb);
1812         key.type = (u8)-1;
1813         key.offset = (u64)-1;
1814
1815         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1816         if (ret <= 0) {
1817                 ret = -EIO;
1818                 goto out;
1819         }
1820
1821         if (ret > 0) {
1822                 ret = btrfs_previous_extent_item(extent_root, path,
1823                                                  key.objectid);
1824                 if (ret)
1825                         goto full_backref;
1826
1827         }
1828         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1829
1830         eb = path->nodes[0];
1831         slot = path->slots[0];
1832         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1833
1834         flags = btrfs_extent_flags(eb, ei);
1835         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1836                 goto full_backref;
1837
1838         ptr = (unsigned long)(ei + 1);
1839         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1840
1841         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1842                 ptr += sizeof(struct btrfs_tree_block_info);
1843
1844 next:
1845         /* Reached extent item ends normally */
1846         if (ptr == end)
1847                 goto full_backref;
1848
1849         /* Beyond extent item end, wrong item size */
1850         if (ptr > end) {
1851                 error("extent item at bytenr %llu slot %d has wrong size",
1852                         eb->start, slot);
1853                 goto full_backref;
1854         }
1855
1856         iref = (struct btrfs_extent_inline_ref *)ptr;
1857         offset = btrfs_extent_inline_ref_offset(eb, iref);
1858         type = btrfs_extent_inline_ref_type(eb, iref);
1859
1860         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1861                 goto normal;
1862         ptr += btrfs_extent_inline_ref_size(type);
1863         goto next;
1864
1865 normal:
1866         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1867         goto out;
1868
1869 full_backref:
1870         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1871 out:
1872         btrfs_free_path(path);
1873         return ret;
1874 }
1875
1876 /*
1877  * for a tree node or leaf, we record its reference count, so later if we still
1878  * process this node or leaf, don't need to compute its reference count again.
1879  *
1880  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1881  */
1882 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1883                              struct extent_buffer *eb, struct node_refs *nrefs,
1884                              u64 level, int check_all)
1885 {
1886         struct ulist *roots;
1887         u64 refs = 0;
1888         u64 flags = 0;
1889         int root_level = btrfs_header_level(root->node);
1890         int check;
1891         int ret;
1892
1893         if (nrefs->bytenr[level] == bytenr)
1894                 return 0;
1895
1896         if (bytenr != (u64)-1) {
1897                 /* the return value of this function seems a mistake */
1898                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1899                                        level, 1, &refs, &flags);
1900                 /* temporary fix */
1901                 if (ret < 0 && !check_all)
1902                         return ret;
1903
1904                 nrefs->bytenr[level] = bytenr;
1905                 nrefs->refs[level] = refs;
1906                 nrefs->full_backref[level] = 0;
1907                 nrefs->checked[level] = 0;
1908
1909                 if (refs > 1) {
1910                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1911                                                    0, &roots);
1912                         if (ret)
1913                                 return -EIO;
1914
1915                         check = need_check(root, roots);
1916                         ulist_free(roots);
1917                         nrefs->need_check[level] = check;
1918                 } else {
1919                         if (!check_all) {
1920                                 nrefs->need_check[level] = 1;
1921                         } else {
1922                                 if (level == root_level) {
1923                                         nrefs->need_check[level] = 1;
1924                                 } else {
1925                                         /*
1926                                          * The node refs may have not been
1927                                          * updated if upper needs checking (the
1928                                          * lowest root_objectid) the node can
1929                                          * be checked.
1930                                          */
1931                                         nrefs->need_check[level] =
1932                                                 nrefs->need_check[level + 1];
1933                                 }
1934                         }
1935                 }
1936         }
1937
1938         if (check_all && eb) {
1939                 calc_extent_flag_v2(root, eb, &flags);
1940                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1941                         nrefs->full_backref[level] = 1;
1942         }
1943
1944         return 0;
1945 }
1946
1947 /*
1948  * @level           if @level == -1 means extent data item
1949  *                  else normal treeblocl.
1950  */
1951 static int should_check_extent_strictly(struct btrfs_root *root,
1952                                         struct node_refs *nrefs, int level)
1953 {
1954         int root_level = btrfs_header_level(root->node);
1955
1956         if (level > root_level || level < -1)
1957                 return 1;
1958         if (level == root_level)
1959                 return 1;
1960         /*
1961          * if the upper node is marked full backref, it should contain shared
1962          * backref of the parent (except owner == root->objectid).
1963          */
1964         while (++level <= root_level)
1965                 if (nrefs->refs[level] > 1)
1966                         return 0;
1967
1968         return 1;
1969 }
1970
1971 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1972                           struct walk_control *wc, int *level,
1973                           struct node_refs *nrefs)
1974 {
1975         enum btrfs_tree_block_status status;
1976         u64 bytenr;
1977         u64 ptr_gen;
1978         struct btrfs_fs_info *fs_info = root->fs_info;
1979         struct extent_buffer *next;
1980         struct extent_buffer *cur;
1981         int ret, err = 0;
1982         u64 refs;
1983
1984         WARN_ON(*level < 0);
1985         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1986
1987         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1988                 refs = nrefs->refs[*level];
1989                 ret = 0;
1990         } else {
1991                 ret = btrfs_lookup_extent_info(NULL, root,
1992                                        path->nodes[*level]->start,
1993                                        *level, 1, &refs, NULL);
1994                 if (ret < 0) {
1995                         err = ret;
1996                         goto out;
1997                 }
1998                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1999                 nrefs->refs[*level] = refs;
2000         }
2001
2002         if (refs > 1) {
2003                 ret = enter_shared_node(root, path->nodes[*level]->start,
2004                                         refs, wc, *level);
2005                 if (ret > 0) {
2006                         err = ret;
2007                         goto out;
2008                 }
2009         }
2010
2011         while (*level >= 0) {
2012                 WARN_ON(*level < 0);
2013                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014                 cur = path->nodes[*level];
2015
2016                 if (btrfs_header_level(cur) != *level)
2017                         WARN_ON(1);
2018
2019                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2020                         break;
2021                 if (*level == 0) {
2022                         ret = process_one_leaf(root, cur, wc);
2023                         if (ret < 0)
2024                                 err = ret;
2025                         break;
2026                 }
2027                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2028                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2029
2030                 if (bytenr == nrefs->bytenr[*level - 1]) {
2031                         refs = nrefs->refs[*level - 1];
2032                 } else {
2033                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034                                         *level - 1, 1, &refs, NULL);
2035                         if (ret < 0) {
2036                                 refs = 0;
2037                         } else {
2038                                 nrefs->bytenr[*level - 1] = bytenr;
2039                                 nrefs->refs[*level - 1] = refs;
2040                         }
2041                 }
2042
2043                 if (refs > 1) {
2044                         ret = enter_shared_node(root, bytenr, refs,
2045                                                 wc, *level - 1);
2046                         if (ret > 0) {
2047                                 path->slots[*level]++;
2048                                 continue;
2049                         }
2050                 }
2051
2052                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2053                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054                         free_extent_buffer(next);
2055                         reada_walk_down(root, cur, path->slots[*level]);
2056                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2057                         if (!extent_buffer_uptodate(next)) {
2058                                 struct btrfs_key node_key;
2059
2060                                 btrfs_node_key_to_cpu(path->nodes[*level],
2061                                                       &node_key,
2062                                                       path->slots[*level]);
2063                                 btrfs_add_corrupt_extent_record(root->fs_info,
2064                                                 &node_key,
2065                                                 path->nodes[*level]->start,
2066                                                 root->fs_info->nodesize,
2067                                                 *level);
2068                                 err = -EIO;
2069                                 goto out;
2070                         }
2071                 }
2072
2073                 ret = check_child_node(cur, path->slots[*level], next);
2074                 if (ret) {
2075                         free_extent_buffer(next);
2076                         err = ret;
2077                         goto out;
2078                 }
2079
2080                 if (btrfs_is_leaf(next))
2081                         status = btrfs_check_leaf(root, NULL, next);
2082                 else
2083                         status = btrfs_check_node(root, NULL, next);
2084                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2085                         free_extent_buffer(next);
2086                         err = -EIO;
2087                         goto out;
2088                 }
2089
2090                 *level = *level - 1;
2091                 free_extent_buffer(path->nodes[*level]);
2092                 path->nodes[*level] = next;
2093                 path->slots[*level] = 0;
2094         }
2095 out:
2096         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2097         return err;
2098 }
2099
2100 /*
2101  * Update global fs information.
2102  */
2103 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2104                          int level)
2105 {
2106         u32 free_nrs;
2107         struct extent_buffer *eb = path->nodes[level];
2108
2109         total_btree_bytes += eb->len;
2110         if (fs_root_objectid(root->objectid))
2111                 total_fs_tree_bytes += eb->len;
2112         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2113                 total_extent_tree_bytes += eb->len;
2114
2115         if (level == 0) {
2116                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2117         } else {
2118                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2119                             btrfs_header_nritems(eb));
2120                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2121         }
2122 }
2123
2124 /*
2125  * This function only handles BACKREF_MISSING,
2126  * If corresponding extent item exists, increase the ref, else insert an extent
2127  * item and backref.
2128  *
2129  * Returns error bits after repair.
2130  */
2131 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2132                                  struct btrfs_root *root,
2133                                  struct extent_buffer *node,
2134                                  struct node_refs *nrefs, int level, int err)
2135 {
2136         struct btrfs_fs_info *fs_info = root->fs_info;
2137         struct btrfs_root *extent_root = fs_info->extent_root;
2138         struct btrfs_path path;
2139         struct btrfs_extent_item *ei;
2140         struct btrfs_tree_block_info *bi;
2141         struct btrfs_key key;
2142         struct extent_buffer *eb;
2143         u32 size = sizeof(*ei);
2144         u32 node_size = root->fs_info->nodesize;
2145         int insert_extent = 0;
2146         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2147         int root_level = btrfs_header_level(root->node);
2148         int generation;
2149         int ret;
2150         u64 owner;
2151         u64 bytenr;
2152         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2153         u64 parent = 0;
2154
2155         if ((err & BACKREF_MISSING) == 0)
2156                 return err;
2157
2158         WARN_ON(level > BTRFS_MAX_LEVEL);
2159         WARN_ON(level < 0);
2160
2161         btrfs_init_path(&path);
2162         bytenr = btrfs_header_bytenr(node);
2163         owner = btrfs_header_owner(node);
2164         generation = btrfs_header_generation(node);
2165
2166         key.objectid = bytenr;
2167         key.type = (u8)-1;
2168         key.offset = (u64)-1;
2169
2170         /* Search for the extent item */
2171         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2172         if (ret <= 0) {
2173                 ret = -EIO;
2174                 goto out;
2175         }
2176
2177         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2178         if (ret)
2179                 insert_extent = 1;
2180
2181         /* calculate if the extent item flag is full backref or not */
2182         if (nrefs->full_backref[level] != 0)
2183                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2184
2185         /* insert an extent item */
2186         if (insert_extent) {
2187                 struct btrfs_disk_key copy_key;
2188
2189                 generation = btrfs_header_generation(node);
2190
2191                 if (level < root_level && nrefs->full_backref[level + 1] &&
2192                     owner != root->objectid) {
2193                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2194                 }
2195
2196                 key.objectid = bytenr;
2197                 if (!skinny_metadata) {
2198                         key.type = BTRFS_EXTENT_ITEM_KEY;
2199                         key.offset = node_size;
2200                         size += sizeof(*bi);
2201                 } else {
2202                         key.type = BTRFS_METADATA_ITEM_KEY;
2203                         key.offset = level;
2204                 }
2205
2206                 btrfs_release_path(&path);
2207                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2208                                               size);
2209                 if (ret)
2210                         goto out;
2211
2212                 eb = path.nodes[0];
2213                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2214
2215                 btrfs_set_extent_refs(eb, ei, 0);
2216                 btrfs_set_extent_generation(eb, ei, generation);
2217                 btrfs_set_extent_flags(eb, ei, flags);
2218
2219                 if (!skinny_metadata) {
2220                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2221                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2222                                              sizeof(*bi));
2223                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2224                         btrfs_set_disk_key_type(&copy_key, 0);
2225                         btrfs_set_disk_key_offset(&copy_key, 0);
2226
2227                         btrfs_set_tree_block_level(eb, bi, level);
2228                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2229                 }
2230                 btrfs_mark_buffer_dirty(eb);
2231                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2232                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2233
2234                 nrefs->refs[level] = 0;
2235                 nrefs->full_backref[level] =
2236                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2237                 btrfs_release_path(&path);
2238         }
2239
2240         if (level < root_level && nrefs->full_backref[level + 1] &&
2241             owner != root->objectid)
2242                 parent = nrefs->bytenr[level + 1];
2243
2244         /* increase the ref */
2245         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2246                         parent, root->objectid, level, 0);
2247
2248         nrefs->refs[level]++;
2249 out:
2250         btrfs_release_path(&path);
2251         if (ret) {
2252                 error(
2253         "failed to repair tree block ref start %llu root %llu due to %s",
2254                       bytenr, root->objectid, strerror(-ret));
2255         } else {
2256                 printf("Added one tree block ref start %llu %s %llu\n",
2257                        bytenr, parent ? "parent" : "root",
2258                        parent ? parent : root->objectid);
2259                 err &= ~BACKREF_MISSING;
2260         }
2261
2262         return err;
2263 }
2264
2265 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2266                             unsigned int ext_ref);
2267 static int check_tree_block_ref(struct btrfs_root *root,
2268                                 struct extent_buffer *eb, u64 bytenr,
2269                                 int level, u64 owner, struct node_refs *nrefs);
2270 static int check_leaf_items(struct btrfs_trans_handle *trans,
2271                             struct btrfs_root *root, struct btrfs_path *path,
2272                             struct node_refs *nrefs, int account_bytes);
2273
2274 /*
2275  * @trans      just for lowmem repair mode
2276  * @check all  if not 0 then check all tree block backrefs and items
2277  *             0 then just check relationship of items in fs tree(s)
2278  *
2279  * Returns >0  Found error, should continue
2280  * Returns <0  Fatal error, must exit the whole check
2281  * Returns 0   No errors found
2282  */
2283 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2284                              struct btrfs_root *root, struct btrfs_path *path,
2285                              int *level, struct node_refs *nrefs, int ext_ref,
2286                              int check_all)
2287
2288 {
2289         enum btrfs_tree_block_status status;
2290         u64 bytenr;
2291         u64 ptr_gen;
2292         struct btrfs_fs_info *fs_info = root->fs_info;
2293         struct extent_buffer *next;
2294         struct extent_buffer *cur;
2295         int ret;
2296         int err = 0;
2297         int check;
2298         int account_file_data = 0;
2299
2300         WARN_ON(*level < 0);
2301         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2302
2303         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2304                                 path->nodes[*level], nrefs, *level, check_all);
2305         if (ret < 0)
2306                 return ret;
2307
2308         while (*level >= 0) {
2309                 WARN_ON(*level < 0);
2310                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2311                 cur = path->nodes[*level];
2312                 bytenr = btrfs_header_bytenr(cur);
2313                 check = nrefs->need_check[*level];
2314
2315                 if (btrfs_header_level(cur) != *level)
2316                         WARN_ON(1);
2317                /*
2318                 * Update bytes accounting and check tree block ref
2319                 * NOTE: Doing accounting and check before checking nritems
2320                 * is necessary because of empty node/leaf.
2321                 */
2322                 if ((check_all && !nrefs->checked[*level]) ||
2323                     (!check_all && nrefs->need_check[*level])) {
2324                         ret = check_tree_block_ref(root, cur,
2325                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2326                            btrfs_header_owner(cur), nrefs);
2327
2328                         if (repair && ret)
2329                                 ret = repair_tree_block_ref(trans, root,
2330                                     path->nodes[*level], nrefs, *level, ret);
2331                         err |= ret;
2332
2333                         if (check_all && nrefs->need_check[*level] &&
2334                                 nrefs->refs[*level]) {
2335                                 account_bytes(root, path, *level);
2336                                 account_file_data = 1;
2337                         }
2338                         nrefs->checked[*level] = 1;
2339                 }
2340
2341                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2342                         break;
2343
2344                 /* Don't forgot to check leaf/node validation */
2345                 if (*level == 0) {
2346                         /* skip duplicate check */
2347                         if (check || !check_all) {
2348                                 ret = btrfs_check_leaf(root, NULL, cur);
2349                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2350                                         err |= -EIO;
2351                                         break;
2352                                 }
2353                         }
2354
2355                         ret = 0;
2356                         if (!check_all)
2357                                 ret = process_one_leaf_v2(root, path, nrefs,
2358                                                           level, ext_ref);
2359                         else
2360                                 ret = check_leaf_items(trans, root, path,
2361                                                nrefs, account_file_data);
2362                         err |= ret;
2363                         break;
2364                 } else {
2365                         if (check || !check_all) {
2366                                 ret = btrfs_check_node(root, NULL, cur);
2367                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                         err |= -EIO;
2369                                         break;
2370                                 }
2371                         }
2372                 }
2373
2374                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2375                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2376
2377                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2378                                         check_all);
2379                 if (ret < 0)
2380                         break;
2381                 /*
2382                  * check all trees in check_chunks_and_extent_v2
2383                  * check shared node once in check_fs_roots
2384                  */
2385                 if (!check_all && !nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                         &node_key, path->nodes[*level]->start,
2403                                         fs_info->nodesize, *level);
2404                                 err |= -EIO;
2405                                 break;
2406                         }
2407                 }
2408
2409                 ret = check_child_node(cur, path->slots[*level], next);
2410                 err |= ret;
2411                 if (ret < 0) 
2412                         break;
2413
2414                 if (btrfs_is_leaf(next))
2415                         status = btrfs_check_leaf(root, NULL, next);
2416                 else
2417                         status = btrfs_check_node(root, NULL, next);
2418                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2419                         free_extent_buffer(next);
2420                         err |= -EIO;
2421                         break;
2422                 }
2423
2424                 *level = *level - 1;
2425                 free_extent_buffer(path->nodes[*level]);
2426                 path->nodes[*level] = next;
2427                 path->slots[*level] = 0;
2428                 account_file_data = 0;
2429
2430                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2431         }
2432         return err;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int __create_inode_item(struct btrfs_trans_handle *trans,
2695                                struct btrfs_root *root, u64 ino, u64 size,
2696                                u64 nbytes, u64 nlink, u32 mode)
2697 {
2698         struct btrfs_inode_item ii;
2699         time_t now = time(NULL);
2700         int ret;
2701
2702         btrfs_set_stack_inode_size(&ii, size);
2703         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2704         btrfs_set_stack_inode_nlink(&ii, nlink);
2705         btrfs_set_stack_inode_mode(&ii, mode);
2706         btrfs_set_stack_inode_generation(&ii, trans->transid);
2707         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2708         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2709         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2710         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2711         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2712         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2713         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2714
2715         ret = btrfs_insert_inode(trans, root, ino, &ii);
2716         ASSERT(!ret);
2717
2718         warning("root %llu inode %llu recreating inode item, this may "
2719                 "be incomplete, please check permissions and content after "
2720                 "the fsck completes.\n", (unsigned long long)root->objectid,
2721                 (unsigned long long)ino);
2722
2723         return 0;
2724 }
2725
2726 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2727                                     struct btrfs_root *root, u64 ino,
2728                                     u8 filetype)
2729 {
2730         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2731
2732         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2733 }
2734
2735 static int create_inode_item(struct btrfs_root *root,
2736                              struct inode_record *rec, int root_dir)
2737 {
2738         struct btrfs_trans_handle *trans;
2739         u64 nlink = 0;
2740         u32 mode = 0;
2741         u64 size = 0;
2742         int ret;
2743
2744         trans = btrfs_start_transaction(root, 1);
2745         if (IS_ERR(trans)) {
2746                 ret = PTR_ERR(trans);
2747                 return ret;
2748         }
2749
2750         nlink = root_dir ? 1 : rec->found_link;
2751         if (rec->found_dir_item) {
2752                 if (rec->found_file_extent)
2753                         fprintf(stderr, "root %llu inode %llu has both a dir "
2754                                 "item and extents, unsure if it is a dir or a "
2755                                 "regular file so setting it as a directory\n",
2756                                 (unsigned long long)root->objectid,
2757                                 (unsigned long long)rec->ino);
2758                 mode = S_IFDIR | 0755;
2759                 size = rec->found_size;
2760         } else if (!rec->found_dir_item) {
2761                 size = rec->extent_end;
2762                 mode =  S_IFREG | 0755;
2763         }
2764
2765         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2766                                   nlink, mode);
2767         btrfs_commit_transaction(trans, root);
2768         return 0;
2769 }
2770
2771 static int repair_inode_backrefs(struct btrfs_root *root,
2772                                  struct inode_record *rec,
2773                                  struct cache_tree *inode_cache,
2774                                  int delete)
2775 {
2776         struct inode_backref *tmp, *backref;
2777         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2778         int ret = 0;
2779         int repaired = 0;
2780
2781         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2782                 if (!delete && rec->ino == root_dirid) {
2783                         if (!rec->found_inode_item) {
2784                                 ret = create_inode_item(root, rec, 1);
2785                                 if (ret)
2786                                         break;
2787                                 repaired++;
2788                         }
2789                 }
2790
2791                 /* Index 0 for root dir's are special, don't mess with it */
2792                 if (rec->ino == root_dirid && backref->index == 0)
2793                         continue;
2794
2795                 if (delete &&
2796                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2797                      (backref->found_dir_index && backref->found_inode_ref &&
2798                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2799                         ret = delete_dir_index(root, backref);
2800                         if (ret)
2801                                 break;
2802                         repaired++;
2803                         list_del(&backref->list);
2804                         free(backref);
2805                         continue;
2806                 }
2807
2808                 if (!delete && !backref->found_dir_index &&
2809                     backref->found_dir_item && backref->found_inode_ref) {
2810                         ret = add_missing_dir_index(root, inode_cache, rec,
2811                                                     backref);
2812                         if (ret)
2813                                 break;
2814                         repaired++;
2815                         if (backref->found_dir_item &&
2816                             backref->found_dir_index) {
2817                                 if (!backref->errors &&
2818                                     backref->found_inode_ref) {
2819                                         list_del(&backref->list);
2820                                         free(backref);
2821                                         continue;
2822                                 }
2823                         }
2824                 }
2825
2826                 if (!delete && (!backref->found_dir_index &&
2827                                 !backref->found_dir_item &&
2828                                 backref->found_inode_ref)) {
2829                         struct btrfs_trans_handle *trans;
2830                         struct btrfs_key location;
2831
2832                         ret = check_dir_conflict(root, backref->name,
2833                                                  backref->namelen,
2834                                                  backref->dir,
2835                                                  backref->index);
2836                         if (ret) {
2837                                 /*
2838                                  * let nlink fixing routine to handle it,
2839                                  * which can do it better.
2840                                  */
2841                                 ret = 0;
2842                                 break;
2843                         }
2844                         location.objectid = rec->ino;
2845                         location.type = BTRFS_INODE_ITEM_KEY;
2846                         location.offset = 0;
2847
2848                         trans = btrfs_start_transaction(root, 1);
2849                         if (IS_ERR(trans)) {
2850                                 ret = PTR_ERR(trans);
2851                                 break;
2852                         }
2853                         fprintf(stderr, "adding missing dir index/item pair "
2854                                 "for inode %llu\n",
2855                                 (unsigned long long)rec->ino);
2856                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2857                                                     backref->namelen,
2858                                                     backref->dir, &location,
2859                                                     imode_to_type(rec->imode),
2860                                                     backref->index);
2861                         BUG_ON(ret);
2862                         btrfs_commit_transaction(trans, root);
2863                         repaired++;
2864                 }
2865
2866                 if (!delete && (backref->found_inode_ref &&
2867                                 backref->found_dir_index &&
2868                                 backref->found_dir_item &&
2869                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2870                                 !rec->found_inode_item)) {
2871                         ret = create_inode_item(root, rec, 0);
2872                         if (ret)
2873                                 break;
2874                         repaired++;
2875                 }
2876
2877         }
2878         return ret ? ret : repaired;
2879 }
2880
2881 /*
2882  * To determine the file type for nlink/inode_item repair
2883  *
2884  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2885  * Return -ENOENT if file type is not found.
2886  */
2887 static int find_file_type(struct inode_record *rec, u8 *type)
2888 {
2889         struct inode_backref *backref;
2890
2891         /* For inode item recovered case */
2892         if (rec->found_inode_item) {
2893                 *type = imode_to_type(rec->imode);
2894                 return 0;
2895         }
2896
2897         list_for_each_entry(backref, &rec->backrefs, list) {
2898                 if (backref->found_dir_index || backref->found_dir_item) {
2899                         *type = backref->filetype;
2900                         return 0;
2901                 }
2902         }
2903         return -ENOENT;
2904 }
2905
2906 /*
2907  * To determine the file name for nlink repair
2908  *
2909  * Return 0 if file name is found, set name and namelen.
2910  * Return -ENOENT if file name is not found.
2911  */
2912 static int find_file_name(struct inode_record *rec,
2913                           char *name, int *namelen)
2914 {
2915         struct inode_backref *backref;
2916
2917         list_for_each_entry(backref, &rec->backrefs, list) {
2918                 if (backref->found_dir_index || backref->found_dir_item ||
2919                     backref->found_inode_ref) {
2920                         memcpy(name, backref->name, backref->namelen);
2921                         *namelen = backref->namelen;
2922                         return 0;
2923                 }
2924         }
2925         return -ENOENT;
2926 }
2927
2928 /* Reset the nlink of the inode to the correct one */
2929 static int reset_nlink(struct btrfs_trans_handle *trans,
2930                        struct btrfs_root *root,
2931                        struct btrfs_path *path,
2932                        struct inode_record *rec)
2933 {
2934         struct inode_backref *backref;
2935         struct inode_backref *tmp;
2936         struct btrfs_key key;
2937         struct btrfs_inode_item *inode_item;
2938         int ret = 0;
2939
2940         /* We don't believe this either, reset it and iterate backref */
2941         rec->found_link = 0;
2942
2943         /* Remove all backref including the valid ones */
2944         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2945                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2946                                    backref->index, backref->name,
2947                                    backref->namelen, 0);
2948                 if (ret < 0)
2949                         goto out;
2950
2951                 /* remove invalid backref, so it won't be added back */
2952                 if (!(backref->found_dir_index &&
2953                       backref->found_dir_item &&
2954                       backref->found_inode_ref)) {
2955                         list_del(&backref->list);
2956                         free(backref);
2957                 } else {
2958                         rec->found_link++;
2959                 }
2960         }
2961
2962         /* Set nlink to 0 */
2963         key.objectid = rec->ino;
2964         key.type = BTRFS_INODE_ITEM_KEY;
2965         key.offset = 0;
2966         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2967         if (ret < 0)
2968                 goto out;
2969         if (ret > 0) {
2970                 ret = -ENOENT;
2971                 goto out;
2972         }
2973         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2974                                     struct btrfs_inode_item);
2975         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2976         btrfs_mark_buffer_dirty(path->nodes[0]);
2977         btrfs_release_path(path);
2978
2979         /*
2980          * Add back valid inode_ref/dir_item/dir_index,
2981          * add_link() will handle the nlink inc, so new nlink must be correct
2982          */
2983         list_for_each_entry(backref, &rec->backrefs, list) {
2984                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2985                                      backref->name, backref->namelen,
2986                                      backref->filetype, &backref->index, 1, 0);
2987                 if (ret < 0)
2988                         goto out;
2989         }
2990 out:
2991         btrfs_release_path(path);
2992         return ret;
2993 }
2994
2995 static int get_highest_inode(struct btrfs_trans_handle *trans,
2996                                 struct btrfs_root *root,
2997                                 struct btrfs_path *path,
2998                                 u64 *highest_ino)
2999 {
3000         struct btrfs_key key, found_key;
3001         int ret;
3002
3003         btrfs_init_path(path);
3004         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3005         key.offset = -1;
3006         key.type = BTRFS_INODE_ITEM_KEY;
3007         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3008         if (ret == 1) {
3009                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3010                                 path->slots[0] - 1);
3011                 *highest_ino = found_key.objectid;
3012                 ret = 0;
3013         }
3014         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3015                 ret = -EOVERFLOW;
3016         btrfs_release_path(path);
3017         return ret;
3018 }
3019
3020 /*
3021  * Link inode to dir 'lost+found'. Increase @ref_count.
3022  *
3023  * Returns 0 means success.
3024  * Returns <0 means failure.
3025  */
3026 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3027                                    struct btrfs_root *root,
3028                                    struct btrfs_path *path,
3029                                    u64 ino, char *namebuf, u32 name_len,
3030                                    u8 filetype, u64 *ref_count)
3031 {
3032         char *dir_name = "lost+found";
3033         u64 lost_found_ino;
3034         int ret;
3035         u32 mode = 0700;
3036
3037         btrfs_release_path(path);
3038         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3039         if (ret < 0)
3040                 goto out;
3041         lost_found_ino++;
3042
3043         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3044                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3045                           mode);
3046         if (ret < 0) {
3047                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3048                 goto out;
3049         }
3050         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3051                              namebuf, name_len, filetype, NULL, 1, 0);
3052         /*
3053          * Add ".INO" suffix several times to handle case where
3054          * "FILENAME.INO" is already taken by another file.
3055          */
3056         while (ret == -EEXIST) {
3057                 /*
3058                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3059                  */
3060                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3061                         ret = -EFBIG;
3062                         goto out;
3063                 }
3064                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3065                          ".%llu", ino);
3066                 name_len += count_digits(ino) + 1;
3067                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3068                                      name_len, filetype, NULL, 1, 0);
3069         }
3070         if (ret < 0) {
3071                 error("failed to link the inode %llu to %s dir: %s",
3072                       ino, dir_name, strerror(-ret));
3073                 goto out;
3074         }
3075
3076         ++*ref_count;
3077         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3078                name_len, namebuf, dir_name);
3079 out:
3080         btrfs_release_path(path);
3081         if (ret)
3082                 error("failed to move file '%.*s' to '%s' dir", name_len,
3083                                 namebuf, dir_name);
3084         return ret;
3085 }
3086
3087 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3088                                struct btrfs_root *root,
3089                                struct btrfs_path *path,
3090                                struct inode_record *rec)
3091 {
3092         char namebuf[BTRFS_NAME_LEN] = {0};
3093         u8 type = 0;
3094         int namelen = 0;
3095         int name_recovered = 0;
3096         int type_recovered = 0;
3097         int ret = 0;
3098
3099         /*
3100          * Get file name and type first before these invalid inode ref
3101          * are deleted by remove_all_invalid_backref()
3102          */
3103         name_recovered = !find_file_name(rec, namebuf, &namelen);
3104         type_recovered = !find_file_type(rec, &type);
3105
3106         if (!name_recovered) {
3107                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3108                        rec->ino, rec->ino);
3109                 namelen = count_digits(rec->ino);
3110                 sprintf(namebuf, "%llu", rec->ino);
3111                 name_recovered = 1;
3112         }
3113         if (!type_recovered) {
3114                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3115                        rec->ino);
3116                 type = BTRFS_FT_REG_FILE;
3117                 type_recovered = 1;
3118         }
3119
3120         ret = reset_nlink(trans, root, path, rec);
3121         if (ret < 0) {
3122                 fprintf(stderr,
3123                         "Failed to reset nlink for inode %llu: %s\n",
3124                         rec->ino, strerror(-ret));
3125                 goto out;
3126         }
3127
3128         if (rec->found_link == 0) {
3129                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3130                                               namebuf, namelen, type,
3131                                               (u64 *)&rec->found_link);
3132                 if (ret)
3133                         goto out;
3134         }
3135         printf("Fixed the nlink of inode %llu\n", rec->ino);
3136 out:
3137         /*
3138          * Clear the flag anyway, or we will loop forever for the same inode
3139          * as it will not be removed from the bad inode list and the dead loop
3140          * happens.
3141          */
3142         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3143         btrfs_release_path(path);
3144         return ret;
3145 }
3146
3147 /*
3148  * Check if there is any normal(reg or prealloc) file extent for given
3149  * ino.
3150  * This is used to determine the file type when neither its dir_index/item or
3151  * inode_item exists.
3152  *
3153  * This will *NOT* report error, if any error happens, just consider it does
3154  * not have any normal file extent.
3155  */
3156 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3157 {
3158         struct btrfs_path path;
3159         struct btrfs_key key;
3160         struct btrfs_key found_key;
3161         struct btrfs_file_extent_item *fi;
3162         u8 type;
3163         int ret = 0;
3164
3165         btrfs_init_path(&path);
3166         key.objectid = ino;
3167         key.type = BTRFS_EXTENT_DATA_KEY;
3168         key.offset = 0;
3169
3170         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3171         if (ret < 0) {
3172                 ret = 0;
3173                 goto out;
3174         }
3175         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3176                 ret = btrfs_next_leaf(root, &path);
3177                 if (ret) {
3178                         ret = 0;
3179                         goto out;
3180                 }
3181         }
3182         while (1) {
3183                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3184                                       path.slots[0]);
3185                 if (found_key.objectid != ino ||
3186                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3187                         break;
3188                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3189                                     struct btrfs_file_extent_item);
3190                 type = btrfs_file_extent_type(path.nodes[0], fi);
3191                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3192                         ret = 1;
3193                         goto out;
3194                 }
3195         }
3196 out:
3197         btrfs_release_path(&path);
3198         return ret;
3199 }
3200
3201 static u32 btrfs_type_to_imode(u8 type)
3202 {
3203         static u32 imode_by_btrfs_type[] = {
3204                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3205                 [BTRFS_FT_DIR]          = S_IFDIR,
3206                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3207                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3208                 [BTRFS_FT_FIFO]         = S_IFIFO,
3209                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3210                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3211         };
3212
3213         return imode_by_btrfs_type[(type)];
3214 }
3215
3216 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3217                                 struct btrfs_root *root,
3218                                 struct btrfs_path *path,
3219                                 struct inode_record *rec)
3220 {
3221         u8 filetype;
3222         u32 mode = 0700;
3223         int type_recovered = 0;
3224         int ret = 0;
3225
3226         printf("Trying to rebuild inode:%llu\n", rec->ino);
3227
3228         type_recovered = !find_file_type(rec, &filetype);
3229
3230         /*
3231          * Try to determine inode type if type not found.
3232          *
3233          * For found regular file extent, it must be FILE.
3234          * For found dir_item/index, it must be DIR.
3235          *
3236          * For undetermined one, use FILE as fallback.
3237          *
3238          * TODO:
3239          * 1. If found backref(inode_index/item is already handled) to it,
3240          *    it must be DIR.
3241          *    Need new inode-inode ref structure to allow search for that.
3242          */
3243         if (!type_recovered) {
3244                 if (rec->found_file_extent &&
3245                     find_normal_file_extent(root, rec->ino)) {
3246                         type_recovered = 1;
3247                         filetype = BTRFS_FT_REG_FILE;
3248                 } else if (rec->found_dir_item) {
3249                         type_recovered = 1;
3250                         filetype = BTRFS_FT_DIR;
3251                 } else if (!list_empty(&rec->orphan_extents)) {
3252                         type_recovered = 1;
3253                         filetype = BTRFS_FT_REG_FILE;
3254                 } else{
3255                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3256                                rec->ino);
3257                         type_recovered = 1;
3258                         filetype = BTRFS_FT_REG_FILE;
3259                 }
3260         }
3261
3262         ret = btrfs_new_inode(trans, root, rec->ino,
3263                               mode | btrfs_type_to_imode(filetype));
3264         if (ret < 0)
3265                 goto out;
3266
3267         /*
3268          * Here inode rebuild is done, we only rebuild the inode item,
3269          * don't repair the nlink(like move to lost+found).
3270          * That is the job of nlink repair.
3271          *
3272          * We just fill the record and return
3273          */
3274         rec->found_dir_item = 1;
3275         rec->imode = mode | btrfs_type_to_imode(filetype);
3276         rec->nlink = 0;
3277         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3278         /* Ensure the inode_nlinks repair function will be called */
3279         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3280 out:
3281         return ret;
3282 }
3283
3284 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3285                                       struct btrfs_root *root,
3286                                       struct btrfs_path *path,
3287                                       struct inode_record *rec)
3288 {
3289         struct orphan_data_extent *orphan;
3290         struct orphan_data_extent *tmp;
3291         int ret = 0;
3292
3293         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3294                 /*
3295                  * Check for conflicting file extents
3296                  *
3297                  * Here we don't know whether the extents is compressed or not,
3298                  * so we can only assume it not compressed nor data offset,
3299                  * and use its disk_len as extent length.
3300                  */
3301                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3302                                        orphan->offset, orphan->disk_len, 0);
3303                 btrfs_release_path(path);
3304                 if (ret < 0)
3305                         goto out;
3306                 if (!ret) {
3307                         fprintf(stderr,
3308                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3309                                 orphan->disk_bytenr, orphan->disk_len);
3310                         ret = btrfs_free_extent(trans,
3311                                         root->fs_info->extent_root,
3312                                         orphan->disk_bytenr, orphan->disk_len,
3313                                         0, root->objectid, orphan->objectid,
3314                                         orphan->offset);
3315                         if (ret < 0)
3316                                 goto out;
3317                 }
3318                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3319                                 orphan->offset, orphan->disk_bytenr,
3320                                 orphan->disk_len, orphan->disk_len);
3321                 if (ret < 0)
3322                         goto out;
3323
3324                 /* Update file size info */
3325                 rec->found_size += orphan->disk_len;
3326                 if (rec->found_size == rec->nbytes)
3327                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3328
3329                 /* Update the file extent hole info too */
3330                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3331                                            orphan->disk_len);
3332                 if (ret < 0)
3333                         goto out;
3334                 if (RB_EMPTY_ROOT(&rec->holes))
3335                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3336
3337                 list_del(&orphan->list);
3338                 free(orphan);
3339         }
3340         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3341 out:
3342         return ret;
3343 }
3344
3345 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3346                                         struct btrfs_root *root,
3347                                         struct btrfs_path *path,
3348                                         struct inode_record *rec)
3349 {
3350         struct rb_node *node;
3351         struct file_extent_hole *hole;
3352         int found = 0;
3353         int ret = 0;
3354
3355         node = rb_first(&rec->holes);
3356
3357         while (node) {
3358                 found = 1;
3359                 hole = rb_entry(node, struct file_extent_hole, node);
3360                 ret = btrfs_punch_hole(trans, root, rec->ino,
3361                                        hole->start, hole->len);
3362                 if (ret < 0)
3363                         goto out;
3364                 ret = del_file_extent_hole(&rec->holes, hole->start,
3365                                            hole->len);
3366                 if (ret < 0)
3367                         goto out;
3368                 if (RB_EMPTY_ROOT(&rec->holes))
3369                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3370                 node = rb_first(&rec->holes);
3371         }
3372         /* special case for a file losing all its file extent */
3373         if (!found) {
3374                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3375                                        round_up(rec->isize,
3376                                                 root->fs_info->sectorsize));
3377                 if (ret < 0)
3378                         goto out;
3379         }
3380         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3381                rec->ino, root->objectid);
3382 out:
3383         return ret;
3384 }
3385
3386 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3387 {
3388         struct btrfs_trans_handle *trans;
3389         struct btrfs_path path;
3390         int ret = 0;
3391
3392         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3393                              I_ERR_NO_ORPHAN_ITEM |
3394                              I_ERR_LINK_COUNT_WRONG |
3395                              I_ERR_NO_INODE_ITEM |
3396                              I_ERR_FILE_EXTENT_ORPHAN |
3397                              I_ERR_FILE_EXTENT_DISCOUNT|
3398                              I_ERR_FILE_NBYTES_WRONG)))
3399                 return rec->errors;
3400
3401         /*
3402          * For nlink repair, it may create a dir and add link, so
3403          * 2 for parent(256)'s dir_index and dir_item
3404          * 2 for lost+found dir's inode_item and inode_ref
3405          * 1 for the new inode_ref of the file
3406          * 2 for lost+found dir's dir_index and dir_item for the file
3407          */
3408         trans = btrfs_start_transaction(root, 7);
3409         if (IS_ERR(trans))
3410                 return PTR_ERR(trans);
3411
3412         btrfs_init_path(&path);
3413         if (rec->errors & I_ERR_NO_INODE_ITEM)
3414                 ret = repair_inode_no_item(trans, root, &path, rec);
3415         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3416                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3417         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3418                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3419         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3420                 ret = repair_inode_isize(trans, root, &path, rec);
3421         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3422                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3423         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3424                 ret = repair_inode_nlinks(trans, root, &path, rec);
3425         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3426                 ret = repair_inode_nbytes(trans, root, &path, rec);
3427         btrfs_commit_transaction(trans, root);
3428         btrfs_release_path(&path);
3429         return ret;
3430 }
3431
3432 static int check_inode_recs(struct btrfs_root *root,
3433                             struct cache_tree *inode_cache)
3434 {
3435         struct cache_extent *cache;
3436         struct ptr_node *node;
3437         struct inode_record *rec;
3438         struct inode_backref *backref;
3439         int stage = 0;
3440         int ret = 0;
3441         int err = 0;
3442         u64 error = 0;
3443         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3444
3445         if (btrfs_root_refs(&root->root_item) == 0) {
3446                 if (!cache_tree_empty(inode_cache))
3447                         fprintf(stderr, "warning line %d\n", __LINE__);
3448                 return 0;
3449         }
3450
3451         /*
3452          * We need to repair backrefs first because we could change some of the
3453          * errors in the inode recs.
3454          *
3455          * We also need to go through and delete invalid backrefs first and then
3456          * add the correct ones second.  We do this because we may get EEXIST
3457          * when adding back the correct index because we hadn't yet deleted the
3458          * invalid index.
3459          *
3460          * For example, if we were missing a dir index then the directories
3461          * isize would be wrong, so if we fixed the isize to what we thought it
3462          * would be and then fixed the backref we'd still have a invalid fs, so
3463          * we need to add back the dir index and then check to see if the isize
3464          * is still wrong.
3465          */
3466         while (stage < 3) {
3467                 stage++;
3468                 if (stage == 3 && !err)
3469                         break;
3470
3471                 cache = search_cache_extent(inode_cache, 0);
3472                 while (repair && cache) {
3473                         node = container_of(cache, struct ptr_node, cache);
3474                         rec = node->data;
3475                         cache = next_cache_extent(cache);
3476
3477                         /* Need to free everything up and rescan */
3478                         if (stage == 3) {
3479                                 remove_cache_extent(inode_cache, &node->cache);
3480                                 free(node);
3481                                 free_inode_rec(rec);
3482                                 continue;
3483                         }
3484
3485                         if (list_empty(&rec->backrefs))
3486                                 continue;
3487
3488                         ret = repair_inode_backrefs(root, rec, inode_cache,
3489                                                     stage == 1);
3490                         if (ret < 0) {
3491                                 err = ret;
3492                                 stage = 2;
3493                                 break;
3494                         } if (ret > 0) {
3495                                 err = -EAGAIN;
3496                         }
3497                 }
3498         }
3499         if (err)
3500                 return err;
3501
3502         rec = get_inode_rec(inode_cache, root_dirid, 0);
3503         BUG_ON(IS_ERR(rec));
3504         if (rec) {
3505                 ret = check_root_dir(rec);
3506                 if (ret) {
3507                         fprintf(stderr, "root %llu root dir %llu error\n",
3508                                 (unsigned long long)root->root_key.objectid,
3509                                 (unsigned long long)root_dirid);
3510                         print_inode_error(root, rec);
3511                         error++;
3512                 }
3513         } else {
3514                 if (repair) {
3515                         struct btrfs_trans_handle *trans;
3516
3517                         trans = btrfs_start_transaction(root, 1);
3518                         if (IS_ERR(trans)) {
3519                                 err = PTR_ERR(trans);
3520                                 return err;
3521                         }
3522
3523                         fprintf(stderr,
3524                                 "root %llu missing its root dir, recreating\n",
3525                                 (unsigned long long)root->objectid);
3526
3527                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3528                         BUG_ON(ret);
3529
3530                         btrfs_commit_transaction(trans, root);
3531                         return -EAGAIN;
3532                 }
3533
3534                 fprintf(stderr, "root %llu root dir %llu not found\n",
3535                         (unsigned long long)root->root_key.objectid,
3536                         (unsigned long long)root_dirid);
3537         }
3538
3539         while (1) {
3540                 cache = search_cache_extent(inode_cache, 0);
3541                 if (!cache)
3542                         break;
3543                 node = container_of(cache, struct ptr_node, cache);
3544                 rec = node->data;
3545                 remove_cache_extent(inode_cache, &node->cache);
3546                 free(node);
3547                 if (rec->ino == root_dirid ||
3548                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3549                         free_inode_rec(rec);
3550                         continue;
3551                 }
3552
3553                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3554                         ret = check_orphan_item(root, rec->ino);
3555                         if (ret == 0)
3556                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3557                         if (can_free_inode_rec(rec)) {
3558                                 free_inode_rec(rec);
3559                                 continue;
3560                         }
3561                 }
3562
3563                 if (!rec->found_inode_item)
3564                         rec->errors |= I_ERR_NO_INODE_ITEM;
3565                 if (rec->found_link != rec->nlink)
3566                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3567                 if (repair) {
3568                         ret = try_repair_inode(root, rec);
3569                         if (ret == 0 && can_free_inode_rec(rec)) {
3570                                 free_inode_rec(rec);
3571                                 continue;
3572                         }
3573                         ret = 0;
3574                 }
3575
3576                 if (!(repair && ret == 0))
3577                         error++;
3578                 print_inode_error(root, rec);
3579                 list_for_each_entry(backref, &rec->backrefs, list) {
3580                         if (!backref->found_dir_item)
3581                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3582                         if (!backref->found_dir_index)
3583                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3584                         if (!backref->found_inode_ref)
3585                                 backref->errors |= REF_ERR_NO_INODE_REF;
3586                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3587                                 " namelen %u name %s filetype %d errors %x",
3588                                 (unsigned long long)backref->dir,
3589                                 (unsigned long long)backref->index,
3590                                 backref->namelen, backref->name,
3591                                 backref->filetype, backref->errors);
3592                         print_ref_error(backref->errors);
3593                 }
3594                 free_inode_rec(rec);
3595         }
3596         return (error > 0) ? -1 : 0;
3597 }
3598
3599 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3600                                         u64 objectid)
3601 {
3602         struct cache_extent *cache;
3603         struct root_record *rec = NULL;
3604         int ret;
3605
3606         cache = lookup_cache_extent(root_cache, objectid, 1);
3607         if (cache) {
3608                 rec = container_of(cache, struct root_record, cache);
3609         } else {
3610                 rec = calloc(1, sizeof(*rec));
3611                 if (!rec)
3612                         return ERR_PTR(-ENOMEM);
3613                 rec->objectid = objectid;
3614                 INIT_LIST_HEAD(&rec->backrefs);
3615                 rec->cache.start = objectid;
3616                 rec->cache.size = 1;
3617
3618                 ret = insert_cache_extent(root_cache, &rec->cache);
3619                 if (ret)
3620                         return ERR_PTR(-EEXIST);
3621         }
3622         return rec;
3623 }
3624
3625 static struct root_backref *get_root_backref(struct root_record *rec,
3626                                              u64 ref_root, u64 dir, u64 index,
3627                                              const char *name, int namelen)
3628 {
3629         struct root_backref *backref;
3630
3631         list_for_each_entry(backref, &rec->backrefs, list) {
3632                 if (backref->ref_root != ref_root || backref->dir != dir ||
3633                     backref->namelen != namelen)
3634                         continue;
3635                 if (memcmp(name, backref->name, namelen))
3636                         continue;
3637                 return backref;
3638         }
3639
3640         backref = calloc(1, sizeof(*backref) + namelen + 1);
3641         if (!backref)
3642                 return NULL;
3643         backref->ref_root = ref_root;
3644         backref->dir = dir;
3645         backref->index = index;
3646         backref->namelen = namelen;
3647         memcpy(backref->name, name, namelen);
3648         backref->name[namelen] = '\0';
3649         list_add_tail(&backref->list, &rec->backrefs);
3650         return backref;
3651 }
3652
3653 static void free_root_record(struct cache_extent *cache)
3654 {
3655         struct root_record *rec;
3656         struct root_backref *backref;
3657
3658         rec = container_of(cache, struct root_record, cache);
3659         while (!list_empty(&rec->backrefs)) {
3660                 backref = to_root_backref(rec->backrefs.next);
3661                 list_del(&backref->list);
3662                 free(backref);
3663         }
3664
3665         free(rec);
3666 }
3667
3668 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3669
3670 static int add_root_backref(struct cache_tree *root_cache,
3671                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3672                             const char *name, int namelen,
3673                             int item_type, int errors)
3674 {
3675         struct root_record *rec;
3676         struct root_backref *backref;
3677
3678         rec = get_root_rec(root_cache, root_id);
3679         BUG_ON(IS_ERR(rec));
3680         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3681         BUG_ON(!backref);
3682
3683         backref->errors |= errors;
3684
3685         if (item_type != BTRFS_DIR_ITEM_KEY) {
3686                 if (backref->found_dir_index || backref->found_back_ref ||
3687                     backref->found_forward_ref) {
3688                         if (backref->index != index)
3689                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3690                 } else {
3691                         backref->index = index;
3692                 }
3693         }
3694
3695         if (item_type == BTRFS_DIR_ITEM_KEY) {
3696                 if (backref->found_forward_ref)
3697                         rec->found_ref++;
3698                 backref->found_dir_item = 1;
3699         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3700                 backref->found_dir_index = 1;
3701         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3702                 if (backref->found_forward_ref)
3703                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3704                 else if (backref->found_dir_item)
3705                         rec->found_ref++;
3706                 backref->found_forward_ref = 1;
3707         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3708                 if (backref->found_back_ref)
3709                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3710                 backref->found_back_ref = 1;
3711         } else {
3712                 BUG_ON(1);
3713         }
3714
3715         if (backref->found_forward_ref && backref->found_dir_item)
3716                 backref->reachable = 1;
3717         return 0;
3718 }
3719
3720 static int merge_root_recs(struct btrfs_root *root,
3721                            struct cache_tree *src_cache,
3722                            struct cache_tree *dst_cache)
3723 {
3724         struct cache_extent *cache;
3725         struct ptr_node *node;
3726         struct inode_record *rec;
3727         struct inode_backref *backref;
3728         int ret = 0;
3729
3730         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3731                 free_inode_recs_tree(src_cache);
3732                 return 0;
3733         }
3734
3735         while (1) {
3736                 cache = search_cache_extent(src_cache, 0);
3737                 if (!cache)
3738                         break;
3739                 node = container_of(cache, struct ptr_node, cache);
3740                 rec = node->data;
3741                 remove_cache_extent(src_cache, &node->cache);
3742                 free(node);
3743
3744                 ret = is_child_root(root, root->objectid, rec->ino);
3745                 if (ret < 0)
3746                         break;
3747                 else if (ret == 0)
3748                         goto skip;
3749
3750                 list_for_each_entry(backref, &rec->backrefs, list) {
3751                         BUG_ON(backref->found_inode_ref);
3752                         if (backref->found_dir_item)
3753                                 add_root_backref(dst_cache, rec->ino,
3754                                         root->root_key.objectid, backref->dir,
3755                                         backref->index, backref->name,
3756                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3757                                         backref->errors);
3758                         if (backref->found_dir_index)
3759                                 add_root_backref(dst_cache, rec->ino,
3760                                         root->root_key.objectid, backref->dir,
3761                                         backref->index, backref->name,
3762                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3763                                         backref->errors);
3764                 }
3765 skip:
3766                 free_inode_rec(rec);
3767         }
3768         if (ret < 0)
3769                 return ret;
3770         return 0;
3771 }
3772
3773 static int check_root_refs(struct btrfs_root *root,
3774                            struct cache_tree *root_cache)
3775 {
3776         struct root_record *rec;
3777         struct root_record *ref_root;
3778         struct root_backref *backref;
3779         struct cache_extent *cache;
3780         int loop = 1;
3781         int ret;
3782         int error;
3783         int errors = 0;
3784
3785         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3786         BUG_ON(IS_ERR(rec));
3787         rec->found_ref = 1;
3788
3789         /* fixme: this can not detect circular references */
3790         while (loop) {
3791                 loop = 0;
3792                 cache = search_cache_extent(root_cache, 0);
3793                 while (1) {
3794                         if (!cache)
3795                                 break;
3796                         rec = container_of(cache, struct root_record, cache);
3797                         cache = next_cache_extent(cache);
3798
3799                         if (rec->found_ref == 0)
3800                                 continue;
3801
3802                         list_for_each_entry(backref, &rec->backrefs, list) {
3803                                 if (!backref->reachable)
3804                                         continue;
3805
3806                                 ref_root = get_root_rec(root_cache,
3807                                                         backref->ref_root);
3808                                 BUG_ON(IS_ERR(ref_root));
3809                                 if (ref_root->found_ref > 0)
3810                                         continue;
3811
3812                                 backref->reachable = 0;
3813                                 rec->found_ref--;
3814                                 if (rec->found_ref == 0)
3815                                         loop = 1;
3816                         }
3817                 }
3818         }
3819
3820         cache = search_cache_extent(root_cache, 0);
3821         while (1) {
3822                 if (!cache)
3823                         break;
3824                 rec = container_of(cache, struct root_record, cache);
3825                 cache = next_cache_extent(cache);
3826
3827                 if (rec->found_ref == 0 &&
3828                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3829                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3830                         ret = check_orphan_item(root->fs_info->tree_root,
3831                                                 rec->objectid);
3832                         if (ret == 0)
3833                                 continue;
3834
3835                         /*
3836                          * If we don't have a root item then we likely just have
3837                          * a dir item in a snapshot for this root but no actual
3838                          * ref key or anything so it's meaningless.
3839                          */
3840                         if (!rec->found_root_item)
3841                                 continue;
3842                         errors++;
3843                         fprintf(stderr, "fs tree %llu not referenced\n",
3844                                 (unsigned long long)rec->objectid);
3845                 }
3846
3847                 error = 0;
3848                 if (rec->found_ref > 0 && !rec->found_root_item)
3849                         error = 1;
3850                 list_for_each_entry(backref, &rec->backrefs, list) {
3851                         if (!backref->found_dir_item)
3852                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3853                         if (!backref->found_dir_index)
3854                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3855                         if (!backref->found_back_ref)
3856                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3857                         if (!backref->found_forward_ref)
3858                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3859                         if (backref->reachable && backref->errors)
3860                                 error = 1;
3861                 }
3862                 if (!error)
3863                         continue;
3864
3865                 errors++;
3866                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3867                         (unsigned long long)rec->objectid, rec->found_ref,
3868                          rec->found_root_item ? "" : "not found");
3869
3870                 list_for_each_entry(backref, &rec->backrefs, list) {
3871                         if (!backref->reachable)
3872                                 continue;
3873                         if (!backref->errors && rec->found_root_item)
3874                                 continue;
3875                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3876                                 " index %llu namelen %u name %s errors %x\n",
3877                                 (unsigned long long)backref->ref_root,
3878                                 (unsigned long long)backref->dir,
3879                                 (unsigned long long)backref->index,
3880                                 backref->namelen, backref->name,
3881                                 backref->errors);
3882                         print_ref_error(backref->errors);
3883                 }
3884         }
3885         return errors > 0 ? 1 : 0;
3886 }
3887
3888 static int process_root_ref(struct extent_buffer *eb, int slot,
3889                             struct btrfs_key *key,
3890                             struct cache_tree *root_cache)
3891 {
3892         u64 dirid;
3893         u64 index;
3894         u32 len;
3895         u32 name_len;
3896         struct btrfs_root_ref *ref;
3897         char namebuf[BTRFS_NAME_LEN];
3898         int error;
3899
3900         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3901
3902         dirid = btrfs_root_ref_dirid(eb, ref);
3903         index = btrfs_root_ref_sequence(eb, ref);
3904         name_len = btrfs_root_ref_name_len(eb, ref);
3905
3906         if (name_len <= BTRFS_NAME_LEN) {
3907                 len = name_len;
3908                 error = 0;
3909         } else {
3910                 len = BTRFS_NAME_LEN;
3911                 error = REF_ERR_NAME_TOO_LONG;
3912         }
3913         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3914
3915         if (key->type == BTRFS_ROOT_REF_KEY) {
3916                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3917                                  index, namebuf, len, key->type, error);
3918         } else {
3919                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3920                                  index, namebuf, len, key->type, error);
3921         }
3922         return 0;
3923 }
3924
3925 static void free_corrupt_block(struct cache_extent *cache)
3926 {
3927         struct btrfs_corrupt_block *corrupt;
3928
3929         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3930         free(corrupt);
3931 }
3932
3933 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3934
3935 /*
3936  * Repair the btree of the given root.
3937  *
3938  * The fix is to remove the node key in corrupt_blocks cache_tree.
3939  * and rebalance the tree.
3940  * After the fix, the btree should be writeable.
3941  */
3942 static int repair_btree(struct btrfs_root *root,
3943                         struct cache_tree *corrupt_blocks)
3944 {
3945         struct btrfs_trans_handle *trans;
3946         struct btrfs_path path;
3947         struct btrfs_corrupt_block *corrupt;
3948         struct cache_extent *cache;
3949         struct btrfs_key key;
3950         u64 offset;
3951         int level;
3952         int ret = 0;
3953
3954         if (cache_tree_empty(corrupt_blocks))
3955                 return 0;
3956
3957         trans = btrfs_start_transaction(root, 1);
3958         if (IS_ERR(trans)) {
3959                 ret = PTR_ERR(trans);
3960                 fprintf(stderr, "Error starting transaction: %s\n",
3961                         strerror(-ret));
3962                 return ret;
3963         }
3964         btrfs_init_path(&path);
3965         cache = first_cache_extent(corrupt_blocks);
3966         while (cache) {
3967                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3968                                        cache);
3969                 level = corrupt->level;
3970                 path.lowest_level = level;
3971                 key.objectid = corrupt->key.objectid;
3972                 key.type = corrupt->key.type;
3973                 key.offset = corrupt->key.offset;
3974
3975                 /*
3976                  * Here we don't want to do any tree balance, since it may
3977                  * cause a balance with corrupted brother leaf/node,
3978                  * so ins_len set to 0 here.
3979                  * Balance will be done after all corrupt node/leaf is deleted.
3980                  */
3981                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3982                 if (ret < 0)
3983                         goto out;
3984                 offset = btrfs_node_blockptr(path.nodes[level],
3985                                              path.slots[level]);
3986
3987                 /* Remove the ptr */
3988                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3989                 if (ret < 0)
3990                         goto out;
3991                 /*
3992                  * Remove the corresponding extent
3993                  * return value is not concerned.
3994                  */
3995                 btrfs_release_path(&path);
3996                 ret = btrfs_free_extent(trans, root, offset,
3997                                 root->fs_info->nodesize, 0,
3998                                 root->root_key.objectid, level - 1, 0);
3999                 cache = next_cache_extent(cache);
4000         }
4001
4002         /* Balance the btree using btrfs_search_slot() */
4003         cache = first_cache_extent(corrupt_blocks);
4004         while (cache) {
4005                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4006                                        cache);
4007                 memcpy(&key, &corrupt->key, sizeof(key));
4008                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4009                 if (ret < 0)
4010                         goto out;
4011                 /* return will always >0 since it won't find the item */
4012                 ret = 0;
4013                 btrfs_release_path(&path);
4014                 cache = next_cache_extent(cache);
4015         }
4016 out:
4017         btrfs_commit_transaction(trans, root);
4018         btrfs_release_path(&path);
4019         return ret;
4020 }
4021
4022 static int check_fs_root(struct btrfs_root *root,
4023                          struct cache_tree *root_cache,
4024                          struct walk_control *wc)
4025 {
4026         int ret = 0;
4027         int err = 0;
4028         int wret;
4029         int level;
4030         struct btrfs_path path;
4031         struct shared_node root_node;
4032         struct root_record *rec;
4033         struct btrfs_root_item *root_item = &root->root_item;
4034         struct cache_tree corrupt_blocks;
4035         struct orphan_data_extent *orphan;
4036         struct orphan_data_extent *tmp;
4037         enum btrfs_tree_block_status status;
4038         struct node_refs nrefs;
4039
4040         /*
4041          * Reuse the corrupt_block cache tree to record corrupted tree block
4042          *
4043          * Unlike the usage in extent tree check, here we do it in a per
4044          * fs/subvol tree base.
4045          */
4046         cache_tree_init(&corrupt_blocks);
4047         root->fs_info->corrupt_blocks = &corrupt_blocks;
4048
4049         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4050                 rec = get_root_rec(root_cache, root->root_key.objectid);
4051                 BUG_ON(IS_ERR(rec));
4052                 if (btrfs_root_refs(root_item) > 0)
4053                         rec->found_root_item = 1;
4054         }
4055
4056         btrfs_init_path(&path);
4057         memset(&root_node, 0, sizeof(root_node));
4058         cache_tree_init(&root_node.root_cache);
4059         cache_tree_init(&root_node.inode_cache);
4060         memset(&nrefs, 0, sizeof(nrefs));
4061
4062         /* Move the orphan extent record to corresponding inode_record */
4063         list_for_each_entry_safe(orphan, tmp,
4064                                  &root->orphan_data_extents, list) {
4065                 struct inode_record *inode;
4066
4067                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4068                                       1);
4069                 BUG_ON(IS_ERR(inode));
4070                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4071                 list_move(&orphan->list, &inode->orphan_extents);
4072         }
4073
4074         level = btrfs_header_level(root->node);
4075         memset(wc->nodes, 0, sizeof(wc->nodes));
4076         wc->nodes[level] = &root_node;
4077         wc->active_node = level;
4078         wc->root_level = level;
4079
4080         /* We may not have checked the root block, lets do that now */
4081         if (btrfs_is_leaf(root->node))
4082                 status = btrfs_check_leaf(root, NULL, root->node);
4083         else
4084                 status = btrfs_check_node(root, NULL, root->node);
4085         if (status != BTRFS_TREE_BLOCK_CLEAN)
4086                 return -EIO;
4087
4088         if (btrfs_root_refs(root_item) > 0 ||
4089             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4090                 path.nodes[level] = root->node;
4091                 extent_buffer_get(root->node);
4092                 path.slots[level] = 0;
4093         } else {
4094                 struct btrfs_key key;
4095                 struct btrfs_disk_key found_key;
4096
4097                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4098                 level = root_item->drop_level;
4099                 path.lowest_level = level;
4100                 if (level > btrfs_header_level(root->node) ||
4101                     level >= BTRFS_MAX_LEVEL) {
4102                         error("ignoring invalid drop level: %u", level);
4103                         goto skip_walking;
4104                 }
4105                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4106                 if (wret < 0)
4107                         goto skip_walking;
4108                 btrfs_node_key(path.nodes[level], &found_key,
4109                                 path.slots[level]);
4110                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4111                                         sizeof(found_key)));
4112         }
4113
4114         while (1) {
4115                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4116                 if (wret < 0)
4117                         ret = wret;
4118                 if (wret != 0)
4119                         break;
4120
4121                 wret = walk_up_tree(root, &path, wc, &level);
4122                 if (wret < 0)
4123                         ret = wret;
4124                 if (wret != 0)
4125                         break;
4126         }
4127 skip_walking:
4128         btrfs_release_path(&path);
4129
4130         if (!cache_tree_empty(&corrupt_blocks)) {
4131                 struct cache_extent *cache;
4132                 struct btrfs_corrupt_block *corrupt;
4133
4134                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4135                        root->root_key.objectid);
4136                 cache = first_cache_extent(&corrupt_blocks);
4137                 while (cache) {
4138                         corrupt = container_of(cache,
4139                                                struct btrfs_corrupt_block,
4140                                                cache);
4141                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4142                                cache->start, corrupt->level,
4143                                corrupt->key.objectid, corrupt->key.type,
4144                                corrupt->key.offset);
4145                         cache = next_cache_extent(cache);
4146                 }
4147                 if (repair) {
4148                         printf("Try to repair the btree for root %llu\n",
4149                                root->root_key.objectid);
4150                         ret = repair_btree(root, &corrupt_blocks);
4151                         if (ret < 0)
4152                                 fprintf(stderr, "Failed to repair btree: %s\n",
4153                                         strerror(-ret));
4154                         if (!ret)
4155                                 printf("Btree for root %llu is fixed\n",
4156                                        root->root_key.objectid);
4157                 }
4158         }
4159
4160         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4161         if (err < 0)
4162                 ret = err;
4163
4164         if (root_node.current) {
4165                 root_node.current->checked = 1;
4166                 maybe_free_inode_rec(&root_node.inode_cache,
4167                                 root_node.current);
4168         }
4169
4170         err = check_inode_recs(root, &root_node.inode_cache);
4171         if (!ret)
4172                 ret = err;
4173
4174         free_corrupt_blocks_tree(&corrupt_blocks);
4175         root->fs_info->corrupt_blocks = NULL;
4176         free_orphan_data_extents(&root->orphan_data_extents);
4177         return ret;
4178 }
4179
4180 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4181                           struct cache_tree *root_cache)
4182 {
4183         struct btrfs_path path;
4184         struct btrfs_key key;
4185         struct walk_control wc;
4186         struct extent_buffer *leaf, *tree_node;
4187         struct btrfs_root *tmp_root;
4188         struct btrfs_root *tree_root = fs_info->tree_root;
4189         int ret;
4190         int err = 0;
4191
4192         if (ctx.progress_enabled) {
4193                 ctx.tp = TASK_FS_ROOTS;
4194                 task_start(ctx.info);
4195         }
4196
4197         /*
4198          * Just in case we made any changes to the extent tree that weren't
4199          * reflected into the free space cache yet.
4200          */
4201         if (repair)
4202                 reset_cached_block_groups(fs_info);
4203         memset(&wc, 0, sizeof(wc));
4204         cache_tree_init(&wc.shared);
4205         btrfs_init_path(&path);
4206
4207 again:
4208         key.offset = 0;
4209         key.objectid = 0;
4210         key.type = BTRFS_ROOT_ITEM_KEY;
4211         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4212         if (ret < 0) {
4213                 err = 1;
4214                 goto out;
4215         }
4216         tree_node = tree_root->node;
4217         while (1) {
4218                 if (tree_node != tree_root->node) {
4219                         free_root_recs_tree(root_cache);
4220                         btrfs_release_path(&path);
4221                         goto again;
4222                 }
4223                 leaf = path.nodes[0];
4224                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4225                         ret = btrfs_next_leaf(tree_root, &path);
4226                         if (ret) {
4227                                 if (ret < 0)
4228                                         err = 1;
4229                                 break;
4230                         }
4231                         leaf = path.nodes[0];
4232                 }
4233                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4234                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4235                     fs_root_objectid(key.objectid)) {
4236                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4237                                 tmp_root = btrfs_read_fs_root_no_cache(
4238                                                 fs_info, &key);
4239                         } else {
4240                                 key.offset = (u64)-1;
4241                                 tmp_root = btrfs_read_fs_root(
4242                                                 fs_info, &key);
4243                         }
4244                         if (IS_ERR(tmp_root)) {
4245                                 err = 1;
4246                                 goto next;
4247                         }
4248                         ret = check_fs_root(tmp_root, root_cache, &wc);
4249                         if (ret == -EAGAIN) {
4250                                 free_root_recs_tree(root_cache);
4251                                 btrfs_release_path(&path);
4252                                 goto again;
4253                         }
4254                         if (ret)
4255                                 err = 1;
4256                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4257                                 btrfs_free_fs_root(tmp_root);
4258                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4259                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4260                         process_root_ref(leaf, path.slots[0], &key,
4261                                          root_cache);
4262                 }
4263 next:
4264                 path.slots[0]++;
4265         }
4266 out:
4267         btrfs_release_path(&path);
4268         if (err)
4269                 free_extent_cache_tree(&wc.shared);
4270         if (!cache_tree_empty(&wc.shared))
4271                 fprintf(stderr, "warning line %d\n", __LINE__);
4272
4273         task_stop(ctx.info);
4274
4275         return err;
4276 }
4277
4278 /*
4279  * Find the @index according by @ino and name.
4280  * Notice:time efficiency is O(N)
4281  *
4282  * @root:       the root of the fs/file tree
4283  * @index_ret:  the index as return value
4284  * @namebuf:    the name to match
4285  * @name_len:   the length of name to match
4286  * @file_type:  the file_type of INODE_ITEM to match
4287  *
4288  * Returns 0 if found and *@index_ret will be modified with right value
4289  * Returns< 0 not found and *@index_ret will be (u64)-1
4290  */
4291 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4292                           u64 *index_ret, char *namebuf, u32 name_len,
4293                           u8 file_type)
4294 {
4295         struct btrfs_path path;
4296         struct extent_buffer *node;
4297         struct btrfs_dir_item *di;
4298         struct btrfs_key key;
4299         struct btrfs_key location;
4300         char name[BTRFS_NAME_LEN] = {0};
4301
4302         u32 total;
4303         u32 cur = 0;
4304         u32 len;
4305         u32 data_len;
4306         u8 filetype;
4307         int slot;
4308         int ret;
4309
4310         ASSERT(index_ret);
4311
4312         /* search from the last index */
4313         key.objectid = dirid;
4314         key.offset = (u64)-1;
4315         key.type = BTRFS_DIR_INDEX_KEY;
4316
4317         btrfs_init_path(&path);
4318         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4319         if (ret < 0)
4320                 return ret;
4321
4322 loop:
4323         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4324         if (ret) {
4325                 ret = -ENOENT;
4326                 *index_ret = (64)-1;
4327                 goto out;
4328         }
4329         /* Check whether inode_id/filetype/name match */
4330         node = path.nodes[0];
4331         slot = path.slots[0];
4332         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4333         total = btrfs_item_size_nr(node, slot);
4334         while (cur < total) {
4335                 ret = -ENOENT;
4336                 len = btrfs_dir_name_len(node, di);
4337                 data_len = btrfs_dir_data_len(node, di);
4338
4339                 btrfs_dir_item_key_to_cpu(node, di, &location);
4340                 if (location.objectid != location_id ||
4341                     location.type != BTRFS_INODE_ITEM_KEY ||
4342                     location.offset != 0)
4343                         goto next;
4344
4345                 filetype = btrfs_dir_type(node, di);
4346                 if (file_type != filetype)
4347                         goto next;
4348
4349                 if (len > BTRFS_NAME_LEN)
4350                         len = BTRFS_NAME_LEN;
4351
4352                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4353                 if (len != name_len || strncmp(namebuf, name, len))
4354                         goto next;
4355
4356                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4357                 *index_ret = key.offset;
4358                 ret = 0;
4359                 goto out;
4360 next:
4361                 len += sizeof(*di) + data_len;
4362                 di = (struct btrfs_dir_item *)((char *)di + len);
4363                 cur += len;
4364         }
4365         goto loop;
4366
4367 out:
4368         btrfs_release_path(&path);
4369         return ret;
4370 }
4371
4372 /*
4373  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4374  * INODE_REF/INODE_EXTREF match.
4375  *
4376  * @root:       the root of the fs/file tree
4377  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4378  *              value while find index
4379  * @location_key: location key of the struct btrfs_dir_item to match
4380  * @name:       the name to match
4381  * @namelen:    the length of name
4382  * @file_type:  the type of file to math
4383  *
4384  * Return 0 if no error occurred.
4385  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4386  * DIR_ITEM/DIR_INDEX
4387  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4388  * and DIR_ITEM/DIR_INDEX mismatch
4389  */
4390 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4391                          struct btrfs_key *location_key, char *name,
4392                          u32 namelen, u8 file_type)
4393 {
4394         struct btrfs_path path;
4395         struct extent_buffer *node;
4396         struct btrfs_dir_item *di;
4397         struct btrfs_key location;
4398         char namebuf[BTRFS_NAME_LEN] = {0};
4399         u32 total;
4400         u32 cur = 0;
4401         u32 len;
4402         u32 data_len;
4403         u8 filetype;
4404         int slot;
4405         int ret;
4406
4407         /* get the index by traversing all index */
4408         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4409                 ret = find_dir_index(root, key->objectid,
4410                                      location_key->objectid, &key->offset,
4411                                      name, namelen, file_type);
4412                 if (ret)
4413                         ret = DIR_INDEX_MISSING;
4414                 return ret;
4415         }
4416
4417         btrfs_init_path(&path);
4418         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4419         if (ret) {
4420                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4421                         DIR_INDEX_MISSING;
4422                 goto out;
4423         }
4424
4425         /* Check whether inode_id/filetype/name match */
4426         node = path.nodes[0];
4427         slot = path.slots[0];
4428         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4429         total = btrfs_item_size_nr(node, slot);
4430         while (cur < total) {
4431                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4432                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4433
4434                 len = btrfs_dir_name_len(node, di);
4435                 data_len = btrfs_dir_data_len(node, di);
4436
4437                 btrfs_dir_item_key_to_cpu(node, di, &location);
4438                 if (location.objectid != location_key->objectid ||
4439                     location.type != location_key->type ||
4440                     location.offset != location_key->offset)
4441                         goto next;
4442
4443                 filetype = btrfs_dir_type(node, di);
4444                 if (file_type != filetype)
4445                         goto next;
4446
4447                 if (len > BTRFS_NAME_LEN) {
4448                         len = BTRFS_NAME_LEN;
4449                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4450                         root->objectid,
4451                         key->type == BTRFS_DIR_ITEM_KEY ?
4452                         "DIR_ITEM" : "DIR_INDEX",
4453                         key->objectid, key->offset, len);
4454                 }
4455                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4456                                    len);
4457                 if (len != namelen || strncmp(namebuf, name, len))
4458                         goto next;
4459
4460                 ret = 0;
4461                 goto out;
4462 next:
4463                 len += sizeof(*di) + data_len;
4464                 di = (struct btrfs_dir_item *)((char *)di + len);
4465                 cur += len;
4466         }
4467
4468 out:
4469         btrfs_release_path(&path);
4470         return ret;
4471 }
4472
4473 /*
4474  * Prints inode ref error message
4475  */
4476 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4477                                 u64 index, const char *namebuf, int name_len,
4478                                 u8 filetype, int err)
4479 {
4480         if (!err)
4481                 return;
4482
4483         /* root dir error */
4484         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4485                 error(
4486         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4487                       root->objectid, key->objectid, key->offset, namebuf);
4488                 return;
4489         }
4490
4491         /* normal error */
4492         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4493                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4494                       root->objectid, key->offset,
4495                       btrfs_name_hash(namebuf, name_len),
4496                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4497                       namebuf, filetype);
4498         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4499                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4500                       root->objectid, key->offset, index,
4501                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4502                       namebuf, filetype);
4503 }
4504
4505 /*
4506  * Insert the missing inode item.
4507  *
4508  * Returns 0 means success.
4509  * Returns <0 means error.
4510  */
4511 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4512                                      u8 filetype)
4513 {
4514         struct btrfs_key key;
4515         struct btrfs_trans_handle *trans;
4516         struct btrfs_path path;
4517         int ret;
4518
4519         key.objectid = ino;
4520         key.type = BTRFS_INODE_ITEM_KEY;
4521         key.offset = 0;
4522
4523         btrfs_init_path(&path);
4524         trans = btrfs_start_transaction(root, 1);
4525         if (IS_ERR(trans)) {
4526                 ret = -EIO;
4527                 goto out;
4528         }
4529
4530         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4531         if (ret < 0 || !ret)
4532                 goto fail;
4533
4534         /* insert inode item */
4535         create_inode_item_lowmem(trans, root, ino, filetype);
4536         ret = 0;
4537 fail:
4538         btrfs_commit_transaction(trans, root);
4539 out:
4540         if (ret)
4541                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4542                       root->objectid, ino);
4543         btrfs_release_path(&path);
4544         return ret;
4545 }
4546
4547 /*
4548  * The ternary means dir item, dir index and relative inode ref.
4549  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4550  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4551  * strategy:
4552  * If two of three is missing or mismatched, delete the existing one.
4553  * If one of three is missing or mismatched, add the missing one.
4554  *
4555  * returns 0 means success.
4556  * returns not 0 means on error;
4557  */
4558 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4559                           u64 index, char *name, int name_len, u8 filetype,
4560                           int err)
4561 {
4562         struct btrfs_trans_handle *trans;
4563         int stage = 0;
4564         int ret = 0;
4565
4566         /*
4567          * stage shall be one of following valild values:
4568          *      0: Fine, nothing to do.
4569          *      1: One of three is wrong, so add missing one.
4570          *      2: Two of three is wrong, so delete existed one.
4571          */
4572         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4573                 stage++;
4574         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4575                 stage++;
4576         if (err & (INODE_REF_MISSING))
4577                 stage++;
4578
4579         /* stage must be smllarer than 3 */
4580         ASSERT(stage < 3);
4581
4582         trans = btrfs_start_transaction(root, 1);
4583         if (stage == 2) {
4584                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4585                                    name_len, 0);
4586                 goto out;
4587         }
4588         if (stage == 1) {
4589                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4590                                filetype, &index, 1, 1);
4591                 goto out;
4592         }
4593 out:
4594         btrfs_commit_transaction(trans, root);
4595
4596         if (ret)
4597                 error("fail to repair inode %llu name %s filetype %u",
4598                       ino, name, filetype);
4599         else
4600                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4601                        stage == 2 ? "Delete" : "Add",
4602                        ino, name, filetype);
4603
4604         return ret;
4605 }
4606
4607 /*
4608  * Traverse the given INODE_REF and call find_dir_item() to find related
4609  * DIR_ITEM/DIR_INDEX.
4610  *
4611  * @root:       the root of the fs/file tree
4612  * @ref_key:    the key of the INODE_REF
4613  * @path        the path provides node and slot
4614  * @refs:       the count of INODE_REF
4615  * @mode:       the st_mode of INODE_ITEM
4616  * @name_ret:   returns with the first ref's name
4617  * @name_len_ret:    len of the name_ret
4618  *
4619  * Return 0 if no error occurred.
4620  */
4621 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4622                            struct btrfs_path *path, char *name_ret,
4623                            u32 *namelen_ret, u64 *refs_ret, int mode)
4624 {
4625         struct btrfs_key key;
4626         struct btrfs_key location;
4627         struct btrfs_inode_ref *ref;
4628         struct extent_buffer *node;
4629         char namebuf[BTRFS_NAME_LEN] = {0};
4630         u32 total;
4631         u32 cur = 0;
4632         u32 len;
4633         u32 name_len;
4634         u64 index;
4635         int ret;
4636         int err = 0;
4637         int tmp_err;
4638         int slot;
4639         int need_research = 0;
4640         u64 refs;
4641
4642 begin:
4643         err = 0;
4644         cur = 0;
4645         refs = *refs_ret;
4646
4647         /* since after repair, path and the dir item may be changed */
4648         if (need_research) {
4649                 need_research = 0;
4650                 btrfs_release_path(path);
4651                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4652                 /* the item was deleted, let path point to the last checked item */
4653                 if (ret > 0) {
4654                         if (path->slots[0] == 0)
4655                                 btrfs_prev_leaf(root, path);
4656                         else
4657                                 path->slots[0]--;
4658                 }
4659                 if (ret)
4660                         goto out;
4661         }
4662
4663         location.objectid = ref_key->objectid;
4664         location.type = BTRFS_INODE_ITEM_KEY;
4665         location.offset = 0;
4666         node = path->nodes[0];
4667         slot = path->slots[0];
4668
4669         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4670         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4671         total = btrfs_item_size_nr(node, slot);
4672
4673 next:
4674         /* Update inode ref count */
4675         refs++;
4676         tmp_err = 0;
4677         index = btrfs_inode_ref_index(node, ref);
4678         name_len = btrfs_inode_ref_name_len(node, ref);
4679
4680         if (name_len <= BTRFS_NAME_LEN) {
4681                 len = name_len;
4682         } else {
4683                 len = BTRFS_NAME_LEN;
4684                 warning("root %llu INODE_REF[%llu %llu] name too long",
4685                         root->objectid, ref_key->objectid, ref_key->offset);
4686         }
4687
4688         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4689
4690         /* copy the first name found to name_ret */
4691         if (refs == 1 && name_ret) {
4692                 memcpy(name_ret, namebuf, len);
4693                 *namelen_ret = len;
4694         }
4695
4696         /* Check root dir ref */
4697         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4698                 if (index != 0 || len != strlen("..") ||
4699                     strncmp("..", namebuf, len) ||
4700                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4701                         /* set err bits then repair will delete the ref */
4702                         err |= DIR_INDEX_MISSING;
4703                         err |= DIR_ITEM_MISSING;
4704                 }
4705                 goto end;
4706         }
4707
4708         /* Find related DIR_INDEX */
4709         key.objectid = ref_key->offset;
4710         key.type = BTRFS_DIR_INDEX_KEY;
4711         key.offset = index;
4712         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4713                             imode_to_type(mode));
4714
4715         /* Find related dir_item */
4716         key.objectid = ref_key->offset;
4717         key.type = BTRFS_DIR_ITEM_KEY;
4718         key.offset = btrfs_name_hash(namebuf, len);
4719         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4720                             imode_to_type(mode));
4721 end:
4722         if (tmp_err && repair) {
4723                 ret = repair_ternary_lowmem(root, ref_key->offset,
4724                                             ref_key->objectid, index, namebuf,
4725                                             name_len, imode_to_type(mode),
4726                                             tmp_err);
4727                 if (!ret) {
4728                         need_research = 1;
4729                         goto begin;
4730                 }
4731         }
4732         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4733                             imode_to_type(mode), tmp_err);
4734         err |= tmp_err;
4735         len = sizeof(*ref) + name_len;
4736         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4737         cur += len;
4738         if (cur < total)
4739                 goto next;
4740
4741 out:
4742         *refs_ret = refs;
4743         return err;
4744 }
4745
4746 /*
4747  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4748  * DIR_ITEM/DIR_INDEX.
4749  *
4750  * @root:       the root of the fs/file tree
4751  * @ref_key:    the key of the INODE_EXTREF
4752  * @refs:       the count of INODE_EXTREF
4753  * @mode:       the st_mode of INODE_ITEM
4754  *
4755  * Return 0 if no error occurred.
4756  */
4757 static int check_inode_extref(struct btrfs_root *root,
4758                               struct btrfs_key *ref_key,
4759                               struct extent_buffer *node, int slot, u64 *refs,
4760                               int mode)
4761 {
4762         struct btrfs_key key;
4763         struct btrfs_key location;
4764         struct btrfs_inode_extref *extref;
4765         char namebuf[BTRFS_NAME_LEN] = {0};
4766         u32 total;
4767         u32 cur = 0;
4768         u32 len;
4769         u32 name_len;
4770         u64 index;
4771         u64 parent;
4772         int ret;
4773         int err = 0;
4774
4775         location.objectid = ref_key->objectid;
4776         location.type = BTRFS_INODE_ITEM_KEY;
4777         location.offset = 0;
4778
4779         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4780         total = btrfs_item_size_nr(node, slot);
4781
4782 next:
4783         /* update inode ref count */
4784         (*refs)++;
4785         name_len = btrfs_inode_extref_name_len(node, extref);
4786         index = btrfs_inode_extref_index(node, extref);
4787         parent = btrfs_inode_extref_parent(node, extref);
4788         if (name_len <= BTRFS_NAME_LEN) {
4789                 len = name_len;
4790         } else {
4791                 len = BTRFS_NAME_LEN;
4792                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4793                         root->objectid, ref_key->objectid, ref_key->offset);
4794         }
4795         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4796
4797         /* Check root dir ref name */
4798         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4799                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4800                       root->objectid, ref_key->objectid, ref_key->offset,
4801                       namebuf);
4802                 err |= ROOT_DIR_ERROR;
4803         }
4804
4805         /* find related dir_index */
4806         key.objectid = parent;
4807         key.type = BTRFS_DIR_INDEX_KEY;
4808         key.offset = index;
4809         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4810         err |= ret;
4811
4812         /* find related dir_item */
4813         key.objectid = parent;
4814         key.type = BTRFS_DIR_ITEM_KEY;
4815         key.offset = btrfs_name_hash(namebuf, len);
4816         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4817         err |= ret;
4818
4819         len = sizeof(*extref) + name_len;
4820         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4821         cur += len;
4822
4823         if (cur < total)
4824                 goto next;
4825
4826         return err;
4827 }
4828
4829 /*
4830  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4831  * DIR_ITEM/DIR_INDEX match.
4832  * Return with @index_ret.
4833  *
4834  * @root:       the root of the fs/file tree
4835  * @key:        the key of the INODE_REF/INODE_EXTREF
4836  * @name:       the name in the INODE_REF/INODE_EXTREF
4837  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4838  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4839  *              value (64)-1 means do not check index
4840  * @ext_ref:    the EXTENDED_IREF feature
4841  *
4842  * Return 0 if no error occurred.
4843  * Return >0 for error bitmap
4844  */
4845 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4846                           char *name, int namelen, u64 *index_ret,
4847                           unsigned int ext_ref)
4848 {
4849         struct btrfs_path path;
4850         struct btrfs_inode_ref *ref;
4851         struct btrfs_inode_extref *extref;
4852         struct extent_buffer *node;
4853         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4854         u32 total;
4855         u32 cur = 0;
4856         u32 len;
4857         u32 ref_namelen;
4858         u64 ref_index;
4859         u64 parent;
4860         u64 dir_id;
4861         int slot;
4862         int ret;
4863
4864         ASSERT(index_ret);
4865
4866         btrfs_init_path(&path);
4867         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4868         if (ret) {
4869                 ret = INODE_REF_MISSING;
4870                 goto extref;
4871         }
4872
4873         node = path.nodes[0];
4874         slot = path.slots[0];
4875
4876         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4877         total = btrfs_item_size_nr(node, slot);
4878
4879         /* Iterate all entry of INODE_REF */
4880         while (cur < total) {
4881                 ret = INODE_REF_MISSING;
4882
4883                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4884                 ref_index = btrfs_inode_ref_index(node, ref);
4885                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4886                         goto next_ref;
4887
4888                 if (cur + sizeof(*ref) + ref_namelen > total ||
4889                     ref_namelen > BTRFS_NAME_LEN) {
4890                         warning("root %llu INODE %s[%llu %llu] name too long",
4891                                 root->objectid,
4892                                 key->type == BTRFS_INODE_REF_KEY ?
4893                                         "REF" : "EXTREF",
4894                                 key->objectid, key->offset);
4895
4896                         if (cur + sizeof(*ref) > total)
4897                                 break;
4898                         len = min_t(u32, total - cur - sizeof(*ref),
4899                                     BTRFS_NAME_LEN);
4900                 } else {
4901                         len = ref_namelen;
4902                 }
4903
4904                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4905                                    len);
4906
4907                 if (len != namelen || strncmp(ref_namebuf, name, len))
4908                         goto next_ref;
4909
4910                 *index_ret = ref_index;
4911                 ret = 0;
4912                 goto out;
4913 next_ref:
4914                 len = sizeof(*ref) + ref_namelen;
4915                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4916                 cur += len;
4917         }
4918
4919 extref:
4920         /* Skip if not support EXTENDED_IREF feature */
4921         if (!ext_ref)
4922                 goto out;
4923
4924         btrfs_release_path(&path);
4925         btrfs_init_path(&path);
4926
4927         dir_id = key->offset;
4928         key->type = BTRFS_INODE_EXTREF_KEY;
4929         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4930
4931         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4932         if (ret) {
4933                 ret = INODE_REF_MISSING;
4934                 goto out;
4935         }
4936
4937         node = path.nodes[0];
4938         slot = path.slots[0];
4939
4940         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4941         cur = 0;
4942         total = btrfs_item_size_nr(node, slot);
4943
4944         /* Iterate all entry of INODE_EXTREF */
4945         while (cur < total) {
4946                 ret = INODE_REF_MISSING;
4947
4948                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4949                 ref_index = btrfs_inode_extref_index(node, extref);
4950                 parent = btrfs_inode_extref_parent(node, extref);
4951                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4952                         goto next_extref;
4953
4954                 if (parent != dir_id)
4955                         goto next_extref;
4956
4957                 if (ref_namelen <= BTRFS_NAME_LEN) {
4958                         len = ref_namelen;
4959                 } else {
4960                         len = BTRFS_NAME_LEN;
4961                         warning("root %llu INODE %s[%llu %llu] name too long",
4962                                 root->objectid,
4963                                 key->type == BTRFS_INODE_REF_KEY ?
4964                                         "REF" : "EXTREF",
4965                                 key->objectid, key->offset);
4966                 }
4967                 read_extent_buffer(node, ref_namebuf,
4968                                    (unsigned long)(extref + 1), len);
4969
4970                 if (len != namelen || strncmp(ref_namebuf, name, len))
4971                         goto next_extref;
4972
4973                 *index_ret = ref_index;
4974                 ret = 0;
4975                 goto out;
4976
4977 next_extref:
4978                 len = sizeof(*extref) + ref_namelen;
4979                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4980                 cur += len;
4981
4982         }
4983 out:
4984         btrfs_release_path(&path);
4985         return ret;
4986 }
4987
4988 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4989                                u64 ino, u64 index, const char *namebuf,
4990                                int name_len, u8 filetype, int err)
4991 {
4992         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4993                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4994                       root->objectid, key->objectid, key->offset, namebuf,
4995                       filetype,
4996                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4997         }
4998
4999         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5000                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5001                       root->objectid, key->objectid, index, namebuf, filetype,
5002                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5003         }
5004
5005         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5006                 error(
5007                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5008                       root->objectid, ino, index, namebuf, filetype,
5009                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5010         }
5011
5012         if (err & INODE_REF_MISSING)
5013                 error(
5014                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5015                       root->objectid, ino, key->objectid, namebuf, filetype);
5016
5017 }
5018
5019 /*
5020  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5021  *
5022  * Returns error after repair
5023  */
5024 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5025                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5026                            int err)
5027 {
5028         int ret;
5029
5030         if (err & INODE_ITEM_MISSING) {
5031                 ret = repair_inode_item_missing(root, ino, filetype);
5032                 if (!ret)
5033                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5034         }
5035
5036         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5037                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5038                                             name_len, filetype, err);
5039                 if (!ret) {
5040                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5041                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5042                         err &= ~(INODE_REF_MISSING);
5043                 }
5044         }
5045         return err;
5046 }
5047
5048 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5049                 u64 *size_ret)
5050 {
5051         struct btrfs_key key;
5052         struct btrfs_path path;
5053         u32 len;
5054         struct btrfs_dir_item *di;
5055         int ret;
5056         int cur = 0;
5057         int total = 0;
5058
5059         ASSERT(size_ret);
5060         *size_ret = 0;
5061
5062         key.objectid = ino;
5063         key.type = type;
5064         key.offset = (u64)-1;
5065
5066         btrfs_init_path(&path);
5067         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5068         if (ret < 0) {
5069                 ret = -EIO;
5070                 goto out;
5071         }
5072         /* if found, go to spacial case */
5073         if (ret == 0)
5074                 goto special_case;
5075
5076 loop:
5077         ret = btrfs_previous_item(root, &path, ino, type);
5078
5079         if (ret) {
5080                 ret = 0;
5081                 goto out;
5082         }
5083
5084 special_case:
5085         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5086         cur = 0;
5087         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5088
5089         while (cur < total) {
5090                 len = btrfs_dir_name_len(path.nodes[0], di);
5091                 if (len > BTRFS_NAME_LEN)
5092                         len = BTRFS_NAME_LEN;
5093                 *size_ret += len;
5094
5095                 len += btrfs_dir_data_len(path.nodes[0], di);
5096                 len += sizeof(*di);
5097                 di = (struct btrfs_dir_item *)((char *)di + len);
5098                 cur += len;
5099         }
5100         goto loop;
5101
5102 out:
5103         btrfs_release_path(&path);
5104         return ret;
5105 }
5106
5107 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5108 {
5109         u64 item_size;
5110         u64 index_size;
5111         int ret;
5112
5113         ASSERT(size);
5114         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5115         if (ret)
5116                 goto out;
5117
5118         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5119         if (ret)
5120                 goto out;
5121
5122         *size = item_size + index_size;
5123
5124 out:
5125         if (ret)
5126                 error("failed to count root %llu INODE[%llu] root size",
5127                       root->objectid, ino);
5128         return ret;
5129 }
5130
5131 /*
5132  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5133  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5134  *
5135  * @root:       the root of the fs/file tree
5136  * @key:        the key of the INODE_REF/INODE_EXTREF
5137  * @path:       the path
5138  * @size:       the st_size of the INODE_ITEM
5139  * @ext_ref:    the EXTENDED_IREF feature
5140  *
5141  * Return 0 if no error occurred.
5142  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5143  */
5144 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5145                           struct btrfs_path *path, u64 *size,
5146                           unsigned int ext_ref)
5147 {
5148         struct btrfs_dir_item *di;
5149         struct btrfs_inode_item *ii;
5150         struct btrfs_key key;
5151         struct btrfs_key location;
5152         struct extent_buffer *node;
5153         int slot;
5154         char namebuf[BTRFS_NAME_LEN] = {0};
5155         u32 total;
5156         u32 cur = 0;
5157         u32 len;
5158         u32 name_len;
5159         u32 data_len;
5160         u8 filetype;
5161         u32 mode = 0;
5162         u64 index;
5163         int ret;
5164         int err;
5165         int tmp_err;
5166         int need_research = 0;
5167
5168         /*
5169          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5170          * ignore index check.
5171          */
5172         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5173                 index = di_key->offset;
5174         else
5175                 index = (u64)-1;
5176 begin:
5177         err = 0;
5178         cur = 0;
5179
5180         /* since after repair, path and the dir item may be changed */
5181         if (need_research) {
5182                 need_research = 0;
5183                 err |= DIR_COUNT_AGAIN;
5184                 btrfs_release_path(path);
5185                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5186                 /* the item was deleted, let path point the last checked item */
5187                 if (ret > 0) {
5188                         if (path->slots[0] == 0)
5189                                 btrfs_prev_leaf(root, path);
5190                         else
5191                                 path->slots[0]--;
5192                 }
5193                 if (ret)
5194                         goto out;
5195         }
5196
5197         node = path->nodes[0];
5198         slot = path->slots[0];
5199
5200         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5201         total = btrfs_item_size_nr(node, slot);
5202         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5203
5204         while (cur < total) {
5205                 data_len = btrfs_dir_data_len(node, di);
5206                 tmp_err = 0;
5207                 if (data_len)
5208                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5209                               root->objectid,
5210               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5211                               di_key->objectid, di_key->offset, data_len);
5212
5213                 name_len = btrfs_dir_name_len(node, di);
5214                 if (name_len <= BTRFS_NAME_LEN) {
5215                         len = name_len;
5216                 } else {
5217                         len = BTRFS_NAME_LEN;
5218                         warning("root %llu %s[%llu %llu] name too long",
5219                                 root->objectid,
5220                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5221                                 di_key->objectid, di_key->offset);
5222                 }
5223                 (*size) += name_len;
5224                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5225                                    len);
5226                 filetype = btrfs_dir_type(node, di);
5227
5228                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5229                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5230                         err |= -EIO;
5231                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5232                         root->objectid, di_key->objectid, di_key->offset,
5233                         namebuf, len, filetype, di_key->offset,
5234                         btrfs_name_hash(namebuf, len));
5235                 }
5236
5237                 btrfs_dir_item_key_to_cpu(node, di, &location);
5238                 /* Ignore related ROOT_ITEM check */
5239                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5240                         goto next;
5241
5242                 btrfs_release_path(path);
5243                 /* Check relative INODE_ITEM(existence/filetype) */
5244                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5245                 if (ret) {
5246                         tmp_err |= INODE_ITEM_MISSING;
5247                         goto next;
5248                 }
5249
5250                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5251                                     struct btrfs_inode_item);
5252                 mode = btrfs_inode_mode(path->nodes[0], ii);
5253                 if (imode_to_type(mode) != filetype) {
5254                         tmp_err |= INODE_ITEM_MISMATCH;
5255                         goto next;
5256                 }
5257
5258                 /* Check relative INODE_REF/INODE_EXTREF */
5259                 key.objectid = location.objectid;
5260                 key.type = BTRFS_INODE_REF_KEY;
5261                 key.offset = di_key->objectid;
5262                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5263                                           &index, ext_ref);
5264
5265                 /* check relative INDEX/ITEM */
5266                 key.objectid = di_key->objectid;
5267                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5268                         key.type = BTRFS_DIR_INDEX_KEY;
5269                         key.offset = index;
5270                 } else {
5271                         key.type = BTRFS_DIR_ITEM_KEY;
5272                         key.offset = btrfs_name_hash(namebuf, name_len);
5273                 }
5274
5275                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5276                                          name_len, filetype);
5277                 /* find_dir_item may find index */
5278                 if (key.type == BTRFS_DIR_INDEX_KEY)
5279                         index = key.offset;
5280 next:
5281
5282                 if (tmp_err && repair) {
5283                         ret = repair_dir_item(root, di_key->objectid,
5284                                               location.objectid, index,
5285                                               imode_to_type(mode), namebuf,
5286                                               name_len, tmp_err);
5287                         if (ret != tmp_err) {
5288                                 need_research = 1;
5289                                 goto begin;
5290                         }
5291                 }
5292                 btrfs_release_path(path);
5293                 print_dir_item_err(root, di_key, location.objectid, index,
5294                                    namebuf, name_len, filetype, tmp_err);
5295                 err |= tmp_err;
5296                 len = sizeof(*di) + name_len + data_len;
5297                 di = (struct btrfs_dir_item *)((char *)di + len);
5298                 cur += len;
5299
5300                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5301                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5302                               root->objectid, di_key->objectid,
5303                               di_key->offset);
5304                         break;
5305                 }
5306         }
5307 out:
5308         /* research path */
5309         btrfs_release_path(path);
5310         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5311         if (ret)
5312                 err |= ret > 0 ? -ENOENT : ret;
5313         return err;
5314 }
5315
5316 /*
5317  * Wrapper function of btrfs_punch_hole.
5318  *
5319  * Returns 0 means success.
5320  * Returns not 0 means error.
5321  */
5322 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5323                              u64 len)
5324 {
5325         struct btrfs_trans_handle *trans;
5326         int ret = 0;
5327
5328         trans = btrfs_start_transaction(root, 1);
5329         if (IS_ERR(trans))
5330                 return PTR_ERR(trans);
5331
5332         ret = btrfs_punch_hole(trans, root, ino, start, len);
5333         if (ret)
5334                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5335                       start, len, ino);
5336         else
5337                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5338                        ino);
5339
5340         btrfs_commit_transaction(trans, root);
5341         return ret;
5342 }
5343
5344 /*
5345  * Check file extent datasum/hole, update the size of the file extents,
5346  * check and update the last offset of the file extent.
5347  *
5348  * @root:       the root of fs/file tree.
5349  * @fkey:       the key of the file extent.
5350  * @nodatasum:  INODE_NODATASUM feature.
5351  * @size:       the sum of all EXTENT_DATA items size for this inode.
5352  * @end:        the offset of the last extent.
5353  *
5354  * Return 0 if no error occurred.
5355  */
5356 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5357                              struct extent_buffer *node, int slot,
5358                              unsigned int nodatasum, u64 *size, u64 *end)
5359 {
5360         struct btrfs_file_extent_item *fi;
5361         u64 disk_bytenr;
5362         u64 disk_num_bytes;
5363         u64 extent_num_bytes;
5364         u64 extent_offset;
5365         u64 csum_found;         /* In byte size, sectorsize aligned */
5366         u64 search_start;       /* Logical range start we search for csum */
5367         u64 search_len;         /* Logical range len we search for csum */
5368         unsigned int extent_type;
5369         unsigned int is_hole;
5370         int compressed = 0;
5371         int ret;
5372         int err = 0;
5373
5374         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5375
5376         /* Check inline extent */
5377         extent_type = btrfs_file_extent_type(node, fi);
5378         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5379                 struct btrfs_item *e = btrfs_item_nr(slot);
5380                 u32 item_inline_len;
5381
5382                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5383                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5384                 compressed = btrfs_file_extent_compression(node, fi);
5385                 if (extent_num_bytes == 0) {
5386                         error(
5387                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5388                                 root->objectid, fkey->objectid, fkey->offset);
5389                         err |= FILE_EXTENT_ERROR;
5390                 }
5391                 if (!compressed && extent_num_bytes != item_inline_len) {
5392                         error(
5393                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5394                                 root->objectid, fkey->objectid, fkey->offset,
5395                                 extent_num_bytes, item_inline_len);
5396                         err |= FILE_EXTENT_ERROR;
5397                 }
5398                 *end += extent_num_bytes;
5399                 *size += extent_num_bytes;
5400                 return err;
5401         }
5402
5403         /* Check extent type */
5404         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5405                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5406                 err |= FILE_EXTENT_ERROR;
5407                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5408                       root->objectid, fkey->objectid, fkey->offset);
5409                 return err;
5410         }
5411
5412         /* Check REG_EXTENT/PREALLOC_EXTENT */
5413         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5414         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5415         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5416         extent_offset = btrfs_file_extent_offset(node, fi);
5417         compressed = btrfs_file_extent_compression(node, fi);
5418         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5419
5420         /*
5421          * Check EXTENT_DATA csum
5422          *
5423          * For plain (uncompressed) extent, we should only check the range
5424          * we're referring to, as it's possible that part of prealloc extent
5425          * has been written, and has csum:
5426          *
5427          * |<--- Original large preallocated extent A ---->|
5428          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5429          *      No csum                         Has csum
5430          *
5431          * For compressed extent, we should check the whole range.
5432          */
5433         if (!compressed) {
5434                 search_start = disk_bytenr + extent_offset;
5435                 search_len = extent_num_bytes;
5436         } else {
5437                 search_start = disk_bytenr;
5438                 search_len = disk_num_bytes;
5439         }
5440         ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5441         if (csum_found > 0 && nodatasum) {
5442                 err |= ODD_CSUM_ITEM;
5443                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5444                       root->objectid, fkey->objectid, fkey->offset);
5445         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5446                    !is_hole && (ret < 0 || csum_found < search_len)) {
5447                 err |= CSUM_ITEM_MISSING;
5448                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5449                       root->objectid, fkey->objectid, fkey->offset,
5450                       csum_found, search_len);
5451         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5452                 err |= ODD_CSUM_ITEM;
5453                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5454                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5455         }
5456
5457         /* Check EXTENT_DATA hole */
5458         if (!no_holes && *end != fkey->offset) {
5459                 if (repair)
5460                         ret = punch_extent_hole(root, fkey->objectid,
5461                                                 *end, fkey->offset - *end);
5462                 if (!repair || ret) {
5463                         err |= FILE_EXTENT_ERROR;
5464                         error(
5465 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5466                                 root->objectid, fkey->objectid, fkey->offset,
5467                                 fkey->objectid, *end);
5468                 }
5469         }
5470
5471         *end += extent_num_bytes;
5472         if (!is_hole)
5473                 *size += extent_num_bytes;
5474
5475         return err;
5476 }
5477
5478 /*
5479  * Set inode item nbytes to @nbytes
5480  *
5481  * Returns  0     on success
5482  * Returns  != 0  on error
5483  */
5484 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5485                                       struct btrfs_path *path,
5486                                       u64 ino, u64 nbytes)
5487 {
5488         struct btrfs_trans_handle *trans;
5489         struct btrfs_inode_item *ii;
5490         struct btrfs_key key;
5491         struct btrfs_key research_key;
5492         int err = 0;
5493         int ret;
5494
5495         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5496
5497         key.objectid = ino;
5498         key.type = BTRFS_INODE_ITEM_KEY;
5499         key.offset = 0;
5500
5501         trans = btrfs_start_transaction(root, 1);
5502         if (IS_ERR(trans)) {
5503                 ret = PTR_ERR(trans);
5504                 err |= ret;
5505                 goto out;
5506         }
5507
5508         btrfs_release_path(path);
5509         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5510         if (ret > 0)
5511                 ret = -ENOENT;
5512         if (ret) {
5513                 err |= ret;
5514                 goto fail;
5515         }
5516
5517         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5518                             struct btrfs_inode_item);
5519         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5520         btrfs_mark_buffer_dirty(path->nodes[0]);
5521 fail:
5522         btrfs_commit_transaction(trans, root);
5523 out:
5524         if (ret)
5525                 error("failed to set nbytes in inode %llu root %llu",
5526                       ino, root->root_key.objectid);
5527         else
5528                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5529                        root->root_key.objectid, nbytes);
5530
5531         /* research path */
5532         btrfs_release_path(path);
5533         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5534         err |= ret;
5535
5536         return err;
5537 }
5538
5539 /*
5540  * Set directory inode isize to @isize.
5541  *
5542  * Returns 0     on success.
5543  * Returns != 0  on error.
5544  */
5545 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5546                                    struct btrfs_path *path,
5547                                    u64 ino, u64 isize)
5548 {
5549         struct btrfs_trans_handle *trans;
5550         struct btrfs_inode_item *ii;
5551         struct btrfs_key key;
5552         struct btrfs_key research_key;
5553         int ret;
5554         int err = 0;
5555
5556         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5557
5558         key.objectid = ino;
5559         key.type = BTRFS_INODE_ITEM_KEY;
5560         key.offset = 0;
5561
5562         trans = btrfs_start_transaction(root, 1);
5563         if (IS_ERR(trans)) {
5564                 ret = PTR_ERR(trans);
5565                 err |= ret;
5566                 goto out;
5567         }
5568
5569         btrfs_release_path(path);
5570         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5571         if (ret > 0)
5572                 ret = -ENOENT;
5573         if (ret) {
5574                 err |= ret;
5575                 goto fail;
5576         }
5577
5578         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5579                             struct btrfs_inode_item);
5580         btrfs_set_inode_size(path->nodes[0], ii, isize);
5581         btrfs_mark_buffer_dirty(path->nodes[0]);
5582 fail:
5583         btrfs_commit_transaction(trans, root);
5584 out:
5585         if (ret)
5586                 error("failed to set isize in inode %llu root %llu",
5587                       ino, root->root_key.objectid);
5588         else
5589                 printf("Set isize in inode %llu root %llu to %llu\n",
5590                        ino, root->root_key.objectid, isize);
5591
5592         btrfs_release_path(path);
5593         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5594         err |= ret;
5595
5596         return err;
5597 }
5598
5599 /*
5600  * Wrapper function for btrfs_add_orphan_item().
5601  *
5602  * Returns 0     on success.
5603  * Returns != 0  on error.
5604  */
5605 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5606                                            struct btrfs_path *path, u64 ino)
5607 {
5608         struct btrfs_trans_handle *trans;
5609         struct btrfs_key research_key;
5610         int ret;
5611         int err = 0;
5612
5613         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5614
5615         trans = btrfs_start_transaction(root, 1);
5616         if (IS_ERR(trans)) {
5617                 ret = PTR_ERR(trans);
5618                 err |= ret;
5619                 goto out;
5620         }
5621
5622         btrfs_release_path(path);
5623         ret = btrfs_add_orphan_item(trans, root, path, ino);
5624         err |= ret;
5625         btrfs_commit_transaction(trans, root);
5626 out:
5627         if (ret)
5628                 error("failed to add inode %llu as orphan item root %llu",
5629                       ino, root->root_key.objectid);
5630         else
5631                 printf("Added inode %llu as orphan item root %llu\n",
5632                        ino, root->root_key.objectid);
5633
5634         btrfs_release_path(path);
5635         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5636         err |= ret;
5637
5638         return err;
5639 }
5640
5641 /* Set inode_item nlink to @ref_count.
5642  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5643  *
5644  * Returns 0 on success
5645  */
5646 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5647                                       struct btrfs_path *path, u64 ino,
5648                                       const char *name, u32 namelen,
5649                                       u64 ref_count, u8 filetype, u64 *nlink)
5650 {
5651         struct btrfs_trans_handle *trans;
5652         struct btrfs_inode_item *ii;
5653         struct btrfs_key key;
5654         struct btrfs_key old_key;
5655         char namebuf[BTRFS_NAME_LEN] = {0};
5656         int name_len;
5657         int ret;
5658         int ret2;
5659
5660         /* save the key */
5661         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5662
5663         if (name && namelen) {
5664                 ASSERT(namelen <= BTRFS_NAME_LEN);
5665                 memcpy(namebuf, name, namelen);
5666                 name_len = namelen;
5667         } else {
5668                 sprintf(namebuf, "%llu", ino);
5669                 name_len = count_digits(ino);
5670                 printf("Can't find file name for inode %llu, use %s instead\n",
5671                        ino, namebuf);
5672         }
5673
5674         trans = btrfs_start_transaction(root, 1);
5675         if (IS_ERR(trans)) {
5676                 ret = PTR_ERR(trans);
5677                 goto out;
5678         }
5679
5680         btrfs_release_path(path);
5681         /* if refs is 0, put it into lostfound */
5682         if (ref_count == 0) {
5683                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5684                                               name_len, filetype, &ref_count);
5685                 if (ret)
5686                         goto fail;
5687         }
5688
5689         /* reset inode_item's nlink to ref_count */
5690         key.objectid = ino;
5691         key.type = BTRFS_INODE_ITEM_KEY;
5692         key.offset = 0;
5693
5694         btrfs_release_path(path);
5695         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5696         if (ret > 0)
5697                 ret = -ENOENT;
5698         if (ret)
5699                 goto fail;
5700
5701         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5702                             struct btrfs_inode_item);
5703         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5704         btrfs_mark_buffer_dirty(path->nodes[0]);
5705
5706         if (nlink)
5707                 *nlink = ref_count;
5708 fail:
5709         btrfs_commit_transaction(trans, root);
5710 out:
5711         if (ret)
5712                 error(
5713         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5714                        root->objectid, ino, namebuf, filetype);
5715         else
5716                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5717                        root->objectid, ino, namebuf, filetype);
5718
5719         /* research */
5720         btrfs_release_path(path);
5721         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5722         if (ret2 < 0)
5723                 return ret |= ret2;
5724         return ret;
5725 }
5726
5727 /*
5728  * Check INODE_ITEM and related ITEMs (the same inode number)
5729  * 1. check link count
5730  * 2. check inode ref/extref
5731  * 3. check dir item/index
5732  *
5733  * @ext_ref:    the EXTENDED_IREF feature
5734  *
5735  * Return 0 if no error occurred.
5736  * Return >0 for error or hit the traversal is done(by error bitmap)
5737  */
5738 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5739                             unsigned int ext_ref)
5740 {
5741         struct extent_buffer *node;
5742         struct btrfs_inode_item *ii;
5743         struct btrfs_key key;
5744         struct btrfs_key last_key;
5745         u64 inode_id;
5746         u32 mode;
5747         u64 nlink;
5748         u64 nbytes;
5749         u64 isize;
5750         u64 size = 0;
5751         u64 refs = 0;
5752         u64 extent_end = 0;
5753         u64 extent_size = 0;
5754         unsigned int dir;
5755         unsigned int nodatasum;
5756         int slot;
5757         int ret;
5758         int err = 0;
5759         char namebuf[BTRFS_NAME_LEN] = {0};
5760         u32 name_len = 0;
5761
5762         node = path->nodes[0];
5763         slot = path->slots[0];
5764
5765         btrfs_item_key_to_cpu(node, &key, slot);
5766         inode_id = key.objectid;
5767
5768         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5769                 ret = btrfs_next_item(root, path);
5770                 if (ret > 0)
5771                         err |= LAST_ITEM;
5772                 return err;
5773         }
5774
5775         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5776         isize = btrfs_inode_size(node, ii);
5777         nbytes = btrfs_inode_nbytes(node, ii);
5778         mode = btrfs_inode_mode(node, ii);
5779         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5780         nlink = btrfs_inode_nlink(node, ii);
5781         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5782
5783         while (1) {
5784                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5785                 ret = btrfs_next_item(root, path);
5786                 if (ret < 0) {
5787                         /* out will fill 'err' rusing current statistics */
5788                         goto out;
5789                 } else if (ret > 0) {
5790                         err |= LAST_ITEM;
5791                         goto out;
5792                 }
5793
5794                 node = path->nodes[0];
5795                 slot = path->slots[0];
5796                 btrfs_item_key_to_cpu(node, &key, slot);
5797                 if (key.objectid != inode_id)
5798                         goto out;
5799
5800                 switch (key.type) {
5801                 case BTRFS_INODE_REF_KEY:
5802                         ret = check_inode_ref(root, &key, path, namebuf,
5803                                               &name_len, &refs, mode);
5804                         err |= ret;
5805                         break;
5806                 case BTRFS_INODE_EXTREF_KEY:
5807                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5808                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5809                                         root->objectid, key.objectid,
5810                                         key.offset);
5811                         ret = check_inode_extref(root, &key, node, slot, &refs,
5812                                                  mode);
5813                         err |= ret;
5814                         break;
5815                 case BTRFS_DIR_ITEM_KEY:
5816                 case BTRFS_DIR_INDEX_KEY:
5817                         if (!dir) {
5818                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5819                                         root->objectid, inode_id,
5820                                         imode_to_type(mode), key.objectid,
5821                                         key.offset);
5822                         }
5823                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5824                         err |= ret;
5825                         break;
5826                 case BTRFS_EXTENT_DATA_KEY:
5827                         if (dir) {
5828                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5829                                         root->objectid, inode_id, key.objectid,
5830                                         key.offset);
5831                         }
5832                         ret = check_file_extent(root, &key, node, slot,
5833                                                 nodatasum, &extent_size,
5834                                                 &extent_end);
5835                         err |= ret;
5836                         break;
5837                 case BTRFS_XATTR_ITEM_KEY:
5838                         break;
5839                 default:
5840                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5841                               key.objectid, key.type, key.offset);
5842                 }
5843         }
5844
5845 out:
5846         if (err & LAST_ITEM) {
5847                 btrfs_release_path(path);
5848                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5849                 if (ret)
5850                         return err;
5851         }
5852
5853         /* verify INODE_ITEM nlink/isize/nbytes */
5854         if (dir) {
5855                 if (repair && (err & DIR_COUNT_AGAIN)) {
5856                         err &= ~DIR_COUNT_AGAIN;
5857                         count_dir_isize(root, inode_id, &size);
5858                 }
5859
5860                 if ((nlink != 1 || refs != 1) && repair) {
5861                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5862                                 namebuf, name_len, refs, imode_to_type(mode),
5863                                 &nlink);
5864                 }
5865
5866                 if (nlink != 1) {
5867                         err |= LINK_COUNT_ERROR;
5868                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5869                               root->objectid, inode_id, nlink);
5870                 }
5871
5872                 /*
5873                  * Just a warning, as dir inode nbytes is just an
5874                  * instructive value.
5875                  */
5876                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5877                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5878                                 root->objectid, inode_id,
5879                                 root->fs_info->nodesize);
5880                 }
5881
5882                 if (isize != size) {
5883                         if (repair)
5884                                 ret = repair_dir_isize_lowmem(root, path,
5885                                                               inode_id, size);
5886                         if (!repair || ret) {
5887                                 err |= ISIZE_ERROR;
5888                                 error(
5889                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5890                                       root->objectid, inode_id, isize, size);
5891                         }
5892                 }
5893         } else {
5894                 if (nlink != refs) {
5895                         if (repair)
5896                                 ret = repair_inode_nlinks_lowmem(root, path,
5897                                          inode_id, namebuf, name_len, refs,
5898                                          imode_to_type(mode), &nlink);
5899                         if (!repair || ret) {
5900                                 err |= LINK_COUNT_ERROR;
5901                                 error(
5902                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5903                                       root->objectid, inode_id, nlink, refs);
5904                         }
5905                 } else if (!nlink) {
5906                         if (repair)
5907                                 ret = repair_inode_orphan_item_lowmem(root,
5908                                                               path, inode_id);
5909                         if (!repair || ret) {
5910                                 err |= ORPHAN_ITEM;
5911                                 error("root %llu INODE[%llu] is orphan item",
5912                                       root->objectid, inode_id);
5913                         }
5914                 }
5915
5916                 if (!nbytes && !no_holes && extent_end < isize) {
5917                         if (repair)
5918                                 ret = punch_extent_hole(root, inode_id,
5919                                                 extent_end, isize - extent_end);
5920                         if (!repair || ret) {
5921                                 err |= NBYTES_ERROR;
5922                                 error(
5923         "root %llu INODE[%llu] size %llu should have a file extent hole",
5924                                       root->objectid, inode_id, isize);
5925                         }
5926                 }
5927
5928                 if (nbytes != extent_size) {
5929                         if (repair)
5930                                 ret = repair_inode_nbytes_lowmem(root, path,
5931                                                          inode_id, extent_size);
5932                         if (!repair || ret) {
5933                                 err |= NBYTES_ERROR;
5934                                 error(
5935         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5936                                       root->objectid, inode_id, nbytes,
5937                                       extent_size);
5938                         }
5939                 }
5940         }
5941
5942         if (err & LAST_ITEM)
5943                 btrfs_next_item(root, path);
5944         return err;
5945 }
5946
5947 /*
5948  * Insert the missing inode item and inode ref.
5949  *
5950  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5951  * Root dir should be handled specially because root dir is the root of fs.
5952  *
5953  * returns err (>0 or 0) after repair
5954  */
5955 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5956 {
5957         struct btrfs_trans_handle *trans;
5958         struct btrfs_key key;
5959         struct btrfs_path path;
5960         int filetype = BTRFS_FT_DIR;
5961         int ret = 0;
5962
5963         btrfs_init_path(&path);
5964
5965         if (err & INODE_REF_MISSING) {
5966                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5967                 key.type = BTRFS_INODE_REF_KEY;
5968                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5969
5970                 trans = btrfs_start_transaction(root, 1);
5971                 if (IS_ERR(trans)) {
5972                         ret = PTR_ERR(trans);
5973                         goto out;
5974                 }
5975
5976                 btrfs_release_path(&path);
5977                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5978                 if (ret)
5979                         goto trans_fail;
5980
5981                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5982                                              BTRFS_FIRST_FREE_OBJECTID,
5983                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5984                 if (ret)
5985                         goto trans_fail;
5986
5987                 printf("Add INODE_REF[%llu %llu] name %s\n",
5988                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5989                        "..");
5990                 err &= ~INODE_REF_MISSING;
5991 trans_fail:
5992                 if (ret)
5993                         error("fail to insert first inode's ref");
5994                 btrfs_commit_transaction(trans, root);
5995         }
5996
5997         if (err & INODE_ITEM_MISSING) {
5998                 ret = repair_inode_item_missing(root,
5999                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6000                 if (ret)
6001                         goto out;
6002                 err &= ~INODE_ITEM_MISSING;
6003         }
6004 out:
6005         if (ret)
6006                 error("fail to repair first inode");
6007         btrfs_release_path(&path);
6008         return err;
6009 }
6010
6011 /*
6012  * check first root dir's inode_item and inode_ref
6013  *
6014  * returns 0 means no error
6015  * returns >0 means error
6016  * returns <0 means fatal error
6017  */
6018 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6019 {
6020         struct btrfs_path path;
6021         struct btrfs_key key;
6022         struct btrfs_inode_item *ii;
6023         u64 index;
6024         u32 mode;
6025         int err = 0;
6026         int ret;
6027
6028         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6029         key.type = BTRFS_INODE_ITEM_KEY;
6030         key.offset = 0;
6031
6032         /* For root being dropped, we don't need to check first inode */
6033         if (btrfs_root_refs(&root->root_item) == 0 &&
6034             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6035             BTRFS_FIRST_FREE_OBJECTID)
6036                 return 0;
6037
6038         btrfs_init_path(&path);
6039         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6040         if (ret < 0)
6041                 goto out;
6042         if (ret > 0) {
6043                 ret = 0;
6044                 err |= INODE_ITEM_MISSING;
6045         } else {
6046                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6047                                     struct btrfs_inode_item);
6048                 mode = btrfs_inode_mode(path.nodes[0], ii);
6049                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6050                         err |= INODE_ITEM_MISMATCH;
6051         }
6052
6053         /* lookup first inode ref */
6054         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6055         key.type = BTRFS_INODE_REF_KEY;
6056         /* special index value */
6057         index = 0;
6058
6059         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6060         if (ret < 0)
6061                 goto out;
6062         err |= ret;
6063
6064 out:
6065         btrfs_release_path(&path);
6066
6067         if (err && repair)
6068                 err = repair_fs_first_inode(root, err);
6069
6070         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6071                 error("root dir INODE_ITEM is %s",
6072                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6073         if (err & INODE_REF_MISSING)
6074                 error("root dir INODE_REF is missing");
6075
6076         return ret < 0 ? ret : err;
6077 }
6078
6079 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6080                                                 u64 parent, u64 root)
6081 {
6082         struct rb_node *node;
6083         struct tree_backref *back = NULL;
6084         struct tree_backref match = {
6085                 .node = {
6086                         .is_data = 0,
6087                 },
6088         };
6089
6090         if (parent) {
6091                 match.parent = parent;
6092                 match.node.full_backref = 1;
6093         } else {
6094                 match.root = root;
6095         }
6096
6097         node = rb_search(&rec->backref_tree, &match.node.node,
6098                          (rb_compare_keys)compare_extent_backref, NULL);
6099         if (node)
6100                 back = to_tree_backref(rb_node_to_extent_backref(node));
6101
6102         return back;
6103 }
6104
6105 static struct data_backref *find_data_backref(struct extent_record *rec,
6106                                                 u64 parent, u64 root,
6107                                                 u64 owner, u64 offset,
6108                                                 int found_ref,
6109                                                 u64 disk_bytenr, u64 bytes)
6110 {
6111         struct rb_node *node;
6112         struct data_backref *back = NULL;
6113         struct data_backref match = {
6114                 .node = {
6115                         .is_data = 1,
6116                 },
6117                 .owner = owner,
6118                 .offset = offset,
6119                 .bytes = bytes,
6120                 .found_ref = found_ref,
6121                 .disk_bytenr = disk_bytenr,
6122         };
6123
6124         if (parent) {
6125                 match.parent = parent;
6126                 match.node.full_backref = 1;
6127         } else {
6128                 match.root = root;
6129         }
6130
6131         node = rb_search(&rec->backref_tree, &match.node.node,
6132                          (rb_compare_keys)compare_extent_backref, NULL);
6133         if (node)
6134                 back = to_data_backref(rb_node_to_extent_backref(node));
6135
6136         return back;
6137 }
6138 /*
6139  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6140  * blocks and integrity of fs tree items.
6141  *
6142  * @root:         the root of the tree to be checked.
6143  * @ext_ref       feature EXTENDED_IREF is enable or not.
6144  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6145  *                otherwise means check fs tree(s) items relationship and
6146  *                @root MUST be a fs tree root.
6147  * Returns 0      represents OK.
6148  * Returns not 0  represents error.
6149  */
6150 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6151                             struct btrfs_root *root, unsigned int ext_ref,
6152                             int check_all)
6153
6154 {
6155         struct btrfs_path path;
6156         struct node_refs nrefs;
6157         struct btrfs_root_item *root_item = &root->root_item;
6158         int ret;
6159         int level;
6160         int err = 0;
6161
6162         memset(&nrefs, 0, sizeof(nrefs));
6163         if (!check_all) {
6164                 /*
6165                  * We need to manually check the first inode item (256)
6166                  * As the following traversal function will only start from
6167                  * the first inode item in the leaf, if inode item (256) is
6168                  * missing we will skip it forever.
6169                  */
6170                 ret = check_fs_first_inode(root, ext_ref);
6171                 if (ret < 0)
6172                         return ret;
6173         }
6174
6175
6176         level = btrfs_header_level(root->node);
6177         btrfs_init_path(&path);
6178
6179         if (btrfs_root_refs(root_item) > 0 ||
6180             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6181                 path.nodes[level] = root->node;
6182                 path.slots[level] = 0;
6183                 extent_buffer_get(root->node);
6184         } else {
6185                 struct btrfs_key key;
6186
6187                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6188                 level = root_item->drop_level;
6189                 path.lowest_level = level;
6190                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6191                 if (ret < 0)
6192                         goto out;
6193                 ret = 0;
6194         }
6195
6196         while (1) {
6197                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6198                                         ext_ref, check_all);
6199
6200                 err |= !!ret;
6201
6202                 /* if ret is negative, walk shall stop */
6203                 if (ret < 0) {
6204                         ret = err;
6205                         break;
6206                 }
6207
6208                 ret = walk_up_tree_v2(root, &path, &level);
6209                 if (ret != 0) {
6210                         /* Normal exit, reset ret to err */
6211                         ret = err;
6212                         break;
6213                 }
6214         }
6215
6216 out:
6217         btrfs_release_path(&path);
6218         return ret;
6219 }
6220
6221 /*
6222  * Iterate all items in the tree and call check_inode_item() to check.
6223  *
6224  * @root:       the root of the tree to be checked.
6225  * @ext_ref:    the EXTENDED_IREF feature
6226  *
6227  * Return 0 if no error found.
6228  * Return <0 for error.
6229  */
6230 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6231 {
6232         reset_cached_block_groups(root->fs_info);
6233         return check_btrfs_root(NULL, root, ext_ref, 0);
6234 }
6235
6236 /*
6237  * Find the relative ref for root_ref and root_backref.
6238  *
6239  * @root:       the root of the root tree.
6240  * @ref_key:    the key of the root ref.
6241  *
6242  * Return 0 if no error occurred.
6243  */
6244 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6245                           struct extent_buffer *node, int slot)
6246 {
6247         struct btrfs_path path;
6248         struct btrfs_key key;
6249         struct btrfs_root_ref *ref;
6250         struct btrfs_root_ref *backref;
6251         char ref_name[BTRFS_NAME_LEN] = {0};
6252         char backref_name[BTRFS_NAME_LEN] = {0};
6253         u64 ref_dirid;
6254         u64 ref_seq;
6255         u32 ref_namelen;
6256         u64 backref_dirid;
6257         u64 backref_seq;
6258         u32 backref_namelen;
6259         u32 len;
6260         int ret;
6261         int err = 0;
6262
6263         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6264         ref_dirid = btrfs_root_ref_dirid(node, ref);
6265         ref_seq = btrfs_root_ref_sequence(node, ref);
6266         ref_namelen = btrfs_root_ref_name_len(node, ref);
6267
6268         if (ref_namelen <= BTRFS_NAME_LEN) {
6269                 len = ref_namelen;
6270         } else {
6271                 len = BTRFS_NAME_LEN;
6272                 warning("%s[%llu %llu] ref_name too long",
6273                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6274                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6275                         ref_key->offset);
6276         }
6277         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6278
6279         /* Find relative root_ref */
6280         key.objectid = ref_key->offset;
6281         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6282         key.offset = ref_key->objectid;
6283
6284         btrfs_init_path(&path);
6285         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6286         if (ret) {
6287                 err |= ROOT_REF_MISSING;
6288                 error("%s[%llu %llu] couldn't find relative ref",
6289                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6290                       "ROOT_REF" : "ROOT_BACKREF",
6291                       ref_key->objectid, ref_key->offset);
6292                 goto out;
6293         }
6294
6295         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6296                                  struct btrfs_root_ref);
6297         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6298         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6299         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6300
6301         if (backref_namelen <= BTRFS_NAME_LEN) {
6302                 len = backref_namelen;
6303         } else {
6304                 len = BTRFS_NAME_LEN;
6305                 warning("%s[%llu %llu] ref_name too long",
6306                         key.type == BTRFS_ROOT_REF_KEY ?
6307                         "ROOT_REF" : "ROOT_BACKREF",
6308                         key.objectid, key.offset);
6309         }
6310         read_extent_buffer(path.nodes[0], backref_name,
6311                            (unsigned long)(backref + 1), len);
6312
6313         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6314             ref_namelen != backref_namelen ||
6315             strncmp(ref_name, backref_name, len)) {
6316                 err |= ROOT_REF_MISMATCH;
6317                 error("%s[%llu %llu] mismatch relative ref",
6318                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6319                       "ROOT_REF" : "ROOT_BACKREF",
6320                       ref_key->objectid, ref_key->offset);
6321         }
6322 out:
6323         btrfs_release_path(&path);
6324         return err;
6325 }
6326
6327 /*
6328  * Check all fs/file tree in low_memory mode.
6329  *
6330  * 1. for fs tree root item, call check_fs_root_v2()
6331  * 2. for fs tree root ref/backref, call check_root_ref()
6332  *
6333  * Return 0 if no error occurred.
6334  */
6335 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6336 {
6337         struct btrfs_root *tree_root = fs_info->tree_root;
6338         struct btrfs_root *cur_root = NULL;
6339         struct btrfs_path path;
6340         struct btrfs_key key;
6341         struct extent_buffer *node;
6342         unsigned int ext_ref;
6343         int slot;
6344         int ret;
6345         int err = 0;
6346
6347         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6348
6349         btrfs_init_path(&path);
6350         key.objectid = BTRFS_FS_TREE_OBJECTID;
6351         key.offset = 0;
6352         key.type = BTRFS_ROOT_ITEM_KEY;
6353
6354         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6355         if (ret < 0) {
6356                 err = ret;
6357                 goto out;
6358         } else if (ret > 0) {
6359                 err = -ENOENT;
6360                 goto out;
6361         }
6362
6363         while (1) {
6364                 node = path.nodes[0];
6365                 slot = path.slots[0];
6366                 btrfs_item_key_to_cpu(node, &key, slot);
6367                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6368                         goto out;
6369                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6370                     fs_root_objectid(key.objectid)) {
6371                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6372                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6373                                                                        &key);
6374                         } else {
6375                                 key.offset = (u64)-1;
6376                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6377                         }
6378
6379                         if (IS_ERR(cur_root)) {
6380                                 error("Fail to read fs/subvol tree: %lld",
6381                                       key.objectid);
6382                                 err = -EIO;
6383                                 goto next;
6384                         }
6385
6386                         ret = check_fs_root_v2(cur_root, ext_ref);
6387                         err |= ret;
6388
6389                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6390                                 btrfs_free_fs_root(cur_root);
6391                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6392                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6393                         ret = check_root_ref(tree_root, &key, node, slot);
6394                         err |= ret;
6395                 }
6396 next:
6397                 ret = btrfs_next_item(tree_root, &path);
6398                 if (ret > 0)
6399                         goto out;
6400                 if (ret < 0) {
6401                         err = ret;
6402                         goto out;
6403                 }
6404         }
6405
6406 out:
6407         btrfs_release_path(&path);
6408         return err;
6409 }
6410
6411 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6412                           struct cache_tree *root_cache)
6413 {
6414         int ret;
6415
6416         if (!ctx.progress_enabled)
6417                 fprintf(stderr, "checking fs roots\n");
6418         if (check_mode == CHECK_MODE_LOWMEM)
6419                 ret = check_fs_roots_v2(fs_info);
6420         else
6421                 ret = check_fs_roots(fs_info, root_cache);
6422
6423         return ret;
6424 }
6425
6426 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6427 {
6428         struct extent_backref *back, *tmp;
6429         struct tree_backref *tback;
6430         struct data_backref *dback;
6431         u64 found = 0;
6432         int err = 0;
6433
6434         rbtree_postorder_for_each_entry_safe(back, tmp,
6435                                              &rec->backref_tree, node) {
6436                 if (!back->found_extent_tree) {
6437                         err = 1;
6438                         if (!print_errs)
6439                                 goto out;
6440                         if (back->is_data) {
6441                                 dback = to_data_backref(back);
6442                                 fprintf(stderr, "Data backref %llu %s %llu"
6443                                         " owner %llu offset %llu num_refs %lu"
6444                                         " not found in extent tree\n",
6445                                         (unsigned long long)rec->start,
6446                                         back->full_backref ?
6447                                         "parent" : "root",
6448                                         back->full_backref ?
6449                                         (unsigned long long)dback->parent:
6450                                         (unsigned long long)dback->root,
6451                                         (unsigned long long)dback->owner,
6452                                         (unsigned long long)dback->offset,
6453                                         (unsigned long)dback->num_refs);
6454                         } else {
6455                                 tback = to_tree_backref(back);
6456                                 fprintf(stderr, "Tree backref %llu parent %llu"
6457                                         " root %llu not found in extent tree\n",
6458                                         (unsigned long long)rec->start,
6459                                         (unsigned long long)tback->parent,
6460                                         (unsigned long long)tback->root);
6461                         }
6462                 }
6463                 if (!back->is_data && !back->found_ref) {
6464                         err = 1;
6465                         if (!print_errs)
6466                                 goto out;
6467                         tback = to_tree_backref(back);
6468                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6469                                 (unsigned long long)rec->start,
6470                                 back->full_backref ? "parent" : "root",
6471                                 back->full_backref ?
6472                                 (unsigned long long)tback->parent :
6473                                 (unsigned long long)tback->root, back);
6474                 }
6475                 if (back->is_data) {
6476                         dback = to_data_backref(back);
6477                         if (dback->found_ref != dback->num_refs) {
6478                                 err = 1;
6479                                 if (!print_errs)
6480                                         goto out;
6481                                 fprintf(stderr, "Incorrect local backref count"
6482                                         " on %llu %s %llu owner %llu"
6483                                         " offset %llu found %u wanted %u back %p\n",
6484                                         (unsigned long long)rec->start,
6485                                         back->full_backref ?
6486                                         "parent" : "root",
6487                                         back->full_backref ?
6488                                         (unsigned long long)dback->parent:
6489                                         (unsigned long long)dback->root,
6490                                         (unsigned long long)dback->owner,
6491                                         (unsigned long long)dback->offset,
6492                                         dback->found_ref, dback->num_refs, back);
6493                         }
6494                         if (dback->disk_bytenr != rec->start) {
6495                                 err = 1;
6496                                 if (!print_errs)
6497                                         goto out;
6498                                 fprintf(stderr, "Backref disk bytenr does not"
6499                                         " match extent record, bytenr=%llu, "
6500                                         "ref bytenr=%llu\n",
6501                                         (unsigned long long)rec->start,
6502                                         (unsigned long long)dback->disk_bytenr);
6503                         }
6504
6505                         if (dback->bytes != rec->nr) {
6506                                 err = 1;
6507                                 if (!print_errs)
6508                                         goto out;
6509                                 fprintf(stderr, "Backref bytes do not match "
6510                                         "extent backref, bytenr=%llu, ref "
6511                                         "bytes=%llu, backref bytes=%llu\n",
6512                                         (unsigned long long)rec->start,
6513                                         (unsigned long long)rec->nr,
6514                                         (unsigned long long)dback->bytes);
6515                         }
6516                 }
6517                 if (!back->is_data) {
6518                         found += 1;
6519                 } else {
6520                         dback = to_data_backref(back);
6521                         found += dback->found_ref;
6522                 }
6523         }
6524         if (found != rec->refs) {
6525                 err = 1;
6526                 if (!print_errs)
6527                         goto out;
6528                 fprintf(stderr, "Incorrect global backref count "
6529                         "on %llu found %llu wanted %llu\n",
6530                         (unsigned long long)rec->start,
6531                         (unsigned long long)found,
6532                         (unsigned long long)rec->refs);
6533         }
6534 out:
6535         return err;
6536 }
6537
6538 static void __free_one_backref(struct rb_node *node)
6539 {
6540         struct extent_backref *back = rb_node_to_extent_backref(node);
6541
6542         free(back);
6543 }
6544
6545 static void free_all_extent_backrefs(struct extent_record *rec)
6546 {
6547         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6548 }
6549
6550 static void free_extent_record_cache(struct cache_tree *extent_cache)
6551 {
6552         struct cache_extent *cache;
6553         struct extent_record *rec;
6554
6555         while (1) {
6556                 cache = first_cache_extent(extent_cache);
6557                 if (!cache)
6558                         break;
6559                 rec = container_of(cache, struct extent_record, cache);
6560                 remove_cache_extent(extent_cache, cache);
6561                 free_all_extent_backrefs(rec);
6562                 free(rec);
6563         }
6564 }
6565
6566 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6567                                  struct extent_record *rec)
6568 {
6569         if (rec->content_checked && rec->owner_ref_checked &&
6570             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6571             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6572             !rec->bad_full_backref && !rec->crossing_stripes &&
6573             !rec->wrong_chunk_type) {
6574                 remove_cache_extent(extent_cache, &rec->cache);
6575                 free_all_extent_backrefs(rec);
6576                 list_del_init(&rec->list);
6577                 free(rec);
6578         }
6579         return 0;
6580 }
6581
6582 static int check_owner_ref(struct btrfs_root *root,
6583                             struct extent_record *rec,
6584                             struct extent_buffer *buf)
6585 {
6586         struct extent_backref *node, *tmp;
6587         struct tree_backref *back;
6588         struct btrfs_root *ref_root;
6589         struct btrfs_key key;
6590         struct btrfs_path path;
6591         struct extent_buffer *parent;
6592         int level;
6593         int found = 0;
6594         int ret;
6595
6596         rbtree_postorder_for_each_entry_safe(node, tmp,
6597                                              &rec->backref_tree, node) {
6598                 if (node->is_data)
6599                         continue;
6600                 if (!node->found_ref)
6601                         continue;
6602                 if (node->full_backref)
6603                         continue;
6604                 back = to_tree_backref(node);
6605                 if (btrfs_header_owner(buf) == back->root)
6606                         return 0;
6607         }
6608         BUG_ON(rec->is_root);
6609
6610         /* try to find the block by search corresponding fs tree */
6611         key.objectid = btrfs_header_owner(buf);
6612         key.type = BTRFS_ROOT_ITEM_KEY;
6613         key.offset = (u64)-1;
6614
6615         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6616         if (IS_ERR(ref_root))
6617                 return 1;
6618
6619         level = btrfs_header_level(buf);
6620         if (level == 0)
6621                 btrfs_item_key_to_cpu(buf, &key, 0);
6622         else
6623                 btrfs_node_key_to_cpu(buf, &key, 0);
6624
6625         btrfs_init_path(&path);
6626         path.lowest_level = level + 1;
6627         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6628         if (ret < 0)
6629                 return 0;
6630
6631         parent = path.nodes[level + 1];
6632         if (parent && buf->start == btrfs_node_blockptr(parent,
6633                                                         path.slots[level + 1]))
6634                 found = 1;
6635
6636         btrfs_release_path(&path);
6637         return found ? 0 : 1;
6638 }
6639
6640 static int is_extent_tree_record(struct extent_record *rec)
6641 {
6642         struct extent_backref *node, *tmp;
6643         struct tree_backref *back;
6644         int is_extent = 0;
6645
6646         rbtree_postorder_for_each_entry_safe(node, tmp,
6647                                              &rec->backref_tree, node) {
6648                 if (node->is_data)
6649                         return 0;
6650                 back = to_tree_backref(node);
6651                 if (node->full_backref)
6652                         return 0;
6653                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6654                         is_extent = 1;
6655         }
6656         return is_extent;
6657 }
6658
6659
6660 static int record_bad_block_io(struct btrfs_fs_info *info,
6661                                struct cache_tree *extent_cache,
6662                                u64 start, u64 len)
6663 {
6664         struct extent_record *rec;
6665         struct cache_extent *cache;
6666         struct btrfs_key key;
6667
6668         cache = lookup_cache_extent(extent_cache, start, len);
6669         if (!cache)
6670                 return 0;
6671
6672         rec = container_of(cache, struct extent_record, cache);
6673         if (!is_extent_tree_record(rec))
6674                 return 0;
6675
6676         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6677         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6678 }
6679
6680 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6681                        struct extent_buffer *buf, int slot)
6682 {
6683         if (btrfs_header_level(buf)) {
6684                 struct btrfs_key_ptr ptr1, ptr2;
6685
6686                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6687                                    sizeof(struct btrfs_key_ptr));
6688                 read_extent_buffer(buf, &ptr2,
6689                                    btrfs_node_key_ptr_offset(slot + 1),
6690                                    sizeof(struct btrfs_key_ptr));
6691                 write_extent_buffer(buf, &ptr1,
6692                                     btrfs_node_key_ptr_offset(slot + 1),
6693                                     sizeof(struct btrfs_key_ptr));
6694                 write_extent_buffer(buf, &ptr2,
6695                                     btrfs_node_key_ptr_offset(slot),
6696                                     sizeof(struct btrfs_key_ptr));
6697                 if (slot == 0) {
6698                         struct btrfs_disk_key key;
6699                         btrfs_node_key(buf, &key, 0);
6700                         btrfs_fixup_low_keys(root, path, &key,
6701                                              btrfs_header_level(buf) + 1);
6702                 }
6703         } else {
6704                 struct btrfs_item *item1, *item2;
6705                 struct btrfs_key k1, k2;
6706                 char *item1_data, *item2_data;
6707                 u32 item1_offset, item2_offset, item1_size, item2_size;
6708
6709                 item1 = btrfs_item_nr(slot);
6710                 item2 = btrfs_item_nr(slot + 1);
6711                 btrfs_item_key_to_cpu(buf, &k1, slot);
6712                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6713                 item1_offset = btrfs_item_offset(buf, item1);
6714                 item2_offset = btrfs_item_offset(buf, item2);
6715                 item1_size = btrfs_item_size(buf, item1);
6716                 item2_size = btrfs_item_size(buf, item2);
6717
6718                 item1_data = malloc(item1_size);
6719                 if (!item1_data)
6720                         return -ENOMEM;
6721                 item2_data = malloc(item2_size);
6722                 if (!item2_data) {
6723                         free(item1_data);
6724                         return -ENOMEM;
6725                 }
6726
6727                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6728                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6729
6730                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6731                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6732                 free(item1_data);
6733                 free(item2_data);
6734
6735                 btrfs_set_item_offset(buf, item1, item2_offset);
6736                 btrfs_set_item_offset(buf, item2, item1_offset);
6737                 btrfs_set_item_size(buf, item1, item2_size);
6738                 btrfs_set_item_size(buf, item2, item1_size);
6739
6740                 path->slots[0] = slot;
6741                 btrfs_set_item_key_unsafe(root, path, &k2);
6742                 path->slots[0] = slot + 1;
6743                 btrfs_set_item_key_unsafe(root, path, &k1);
6744         }
6745         return 0;
6746 }
6747
6748 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6749 {
6750         struct extent_buffer *buf;
6751         struct btrfs_key k1, k2;
6752         int i;
6753         int level = path->lowest_level;
6754         int ret = -EIO;
6755
6756         buf = path->nodes[level];
6757         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6758                 if (level) {
6759                         btrfs_node_key_to_cpu(buf, &k1, i);
6760                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6761                 } else {
6762                         btrfs_item_key_to_cpu(buf, &k1, i);
6763                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6764                 }
6765                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6766                         continue;
6767                 ret = swap_values(root, path, buf, i);
6768                 if (ret)
6769                         break;
6770                 btrfs_mark_buffer_dirty(buf);
6771                 i = 0;
6772         }
6773         return ret;
6774 }
6775
6776 static int delete_bogus_item(struct btrfs_root *root,
6777                              struct btrfs_path *path,
6778                              struct extent_buffer *buf, int slot)
6779 {
6780         struct btrfs_key key;
6781         int nritems = btrfs_header_nritems(buf);
6782
6783         btrfs_item_key_to_cpu(buf, &key, slot);
6784
6785         /* These are all the keys we can deal with missing. */
6786         if (key.type != BTRFS_DIR_INDEX_KEY &&
6787             key.type != BTRFS_EXTENT_ITEM_KEY &&
6788             key.type != BTRFS_METADATA_ITEM_KEY &&
6789             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6790             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6791                 return -1;
6792
6793         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6794                (unsigned long long)key.objectid, key.type,
6795                (unsigned long long)key.offset, slot, buf->start);
6796         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6797                               btrfs_item_nr_offset(slot + 1),
6798                               sizeof(struct btrfs_item) *
6799                               (nritems - slot - 1));
6800         btrfs_set_header_nritems(buf, nritems - 1);
6801         if (slot == 0) {
6802                 struct btrfs_disk_key disk_key;
6803
6804                 btrfs_item_key(buf, &disk_key, 0);
6805                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6806         }
6807         btrfs_mark_buffer_dirty(buf);
6808         return 0;
6809 }
6810
6811 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6812 {
6813         struct extent_buffer *buf;
6814         int i;
6815         int ret = 0;
6816
6817         /* We should only get this for leaves */
6818         BUG_ON(path->lowest_level);
6819         buf = path->nodes[0];
6820 again:
6821         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6822                 unsigned int shift = 0, offset;
6823
6824                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6825                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6826                         if (btrfs_item_end_nr(buf, i) >
6827                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6828                                 ret = delete_bogus_item(root, path, buf, i);
6829                                 if (!ret)
6830                                         goto again;
6831                                 fprintf(stderr, "item is off the end of the "
6832                                         "leaf, can't fix\n");
6833                                 ret = -EIO;
6834                                 break;
6835                         }
6836                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6837                                 btrfs_item_end_nr(buf, i);
6838                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6839                            btrfs_item_offset_nr(buf, i - 1)) {
6840                         if (btrfs_item_end_nr(buf, i) >
6841                             btrfs_item_offset_nr(buf, i - 1)) {
6842                                 ret = delete_bogus_item(root, path, buf, i);
6843                                 if (!ret)
6844                                         goto again;
6845                                 fprintf(stderr, "items overlap, can't fix\n");
6846                                 ret = -EIO;
6847                                 break;
6848                         }
6849                         shift = btrfs_item_offset_nr(buf, i - 1) -
6850                                 btrfs_item_end_nr(buf, i);
6851                 }
6852                 if (!shift)
6853                         continue;
6854
6855                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6856                        i, shift, (unsigned long long)buf->start);
6857                 offset = btrfs_item_offset_nr(buf, i);
6858                 memmove_extent_buffer(buf,
6859                                       btrfs_leaf_data(buf) + offset + shift,
6860                                       btrfs_leaf_data(buf) + offset,
6861                                       btrfs_item_size_nr(buf, i));
6862                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6863                                       offset + shift);
6864                 btrfs_mark_buffer_dirty(buf);
6865         }
6866
6867         /*
6868          * We may have moved things, in which case we want to exit so we don't
6869          * write those changes out.  Once we have proper abort functionality in
6870          * progs this can be changed to something nicer.
6871          */
6872         BUG_ON(ret);
6873         return ret;
6874 }
6875
6876 /*
6877  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6878  * then just return -EIO.
6879  */
6880 static int try_to_fix_bad_block(struct btrfs_root *root,
6881                                 struct extent_buffer *buf,
6882                                 enum btrfs_tree_block_status status)
6883 {
6884         struct btrfs_trans_handle *trans;
6885         struct ulist *roots;
6886         struct ulist_node *node;
6887         struct btrfs_root *search_root;
6888         struct btrfs_path path;
6889         struct ulist_iterator iter;
6890         struct btrfs_key root_key, key;
6891         int ret;
6892
6893         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6894             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6895                 return -EIO;
6896
6897         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6898         if (ret)
6899                 return -EIO;
6900
6901         btrfs_init_path(&path);
6902         ULIST_ITER_INIT(&iter);
6903         while ((node = ulist_next(roots, &iter))) {
6904                 root_key.objectid = node->val;
6905                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6906                 root_key.offset = (u64)-1;
6907
6908                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6909                 if (IS_ERR(root)) {
6910                         ret = -EIO;
6911                         break;
6912                 }
6913
6914
6915                 trans = btrfs_start_transaction(search_root, 0);
6916                 if (IS_ERR(trans)) {
6917                         ret = PTR_ERR(trans);
6918                         break;
6919                 }
6920
6921                 path.lowest_level = btrfs_header_level(buf);
6922                 path.skip_check_block = 1;
6923                 if (path.lowest_level)
6924                         btrfs_node_key_to_cpu(buf, &key, 0);
6925                 else
6926                         btrfs_item_key_to_cpu(buf, &key, 0);
6927                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6928                 if (ret) {
6929                         ret = -EIO;
6930                         btrfs_commit_transaction(trans, search_root);
6931                         break;
6932                 }
6933                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6934                         ret = fix_key_order(search_root, &path);
6935                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6936                         ret = fix_item_offset(search_root, &path);
6937                 if (ret) {
6938                         btrfs_commit_transaction(trans, search_root);
6939                         break;
6940                 }
6941                 btrfs_release_path(&path);
6942                 btrfs_commit_transaction(trans, search_root);
6943         }
6944         ulist_free(roots);
6945         btrfs_release_path(&path);
6946         return ret;
6947 }
6948
6949 static int check_block(struct btrfs_root *root,
6950                        struct cache_tree *extent_cache,
6951                        struct extent_buffer *buf, u64 flags)
6952 {
6953         struct extent_record *rec;
6954         struct cache_extent *cache;
6955         struct btrfs_key key;
6956         enum btrfs_tree_block_status status;
6957         int ret = 0;
6958         int level;
6959
6960         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6961         if (!cache)
6962                 return 1;
6963         rec = container_of(cache, struct extent_record, cache);
6964         rec->generation = btrfs_header_generation(buf);
6965
6966         level = btrfs_header_level(buf);
6967         if (btrfs_header_nritems(buf) > 0) {
6968
6969                 if (level == 0)
6970                         btrfs_item_key_to_cpu(buf, &key, 0);
6971                 else
6972                         btrfs_node_key_to_cpu(buf, &key, 0);
6973
6974                 rec->info_objectid = key.objectid;
6975         }
6976         rec->info_level = level;
6977
6978         if (btrfs_is_leaf(buf))
6979                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6980         else
6981                 status = btrfs_check_node(root, &rec->parent_key, buf);
6982
6983         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6984                 if (repair)
6985                         status = try_to_fix_bad_block(root, buf, status);
6986                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6987                         ret = -EIO;
6988                         fprintf(stderr, "bad block %llu\n",
6989                                 (unsigned long long)buf->start);
6990                 } else {
6991                         /*
6992                          * Signal to callers we need to start the scan over
6993                          * again since we'll have cowed blocks.
6994                          */
6995                         ret = -EAGAIN;
6996                 }
6997         } else {
6998                 rec->content_checked = 1;
6999                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7000                         rec->owner_ref_checked = 1;
7001                 else {
7002                         ret = check_owner_ref(root, rec, buf);
7003                         if (!ret)
7004                                 rec->owner_ref_checked = 1;
7005                 }
7006         }
7007         if (!ret)
7008                 maybe_free_extent_rec(extent_cache, rec);
7009         return ret;
7010 }
7011
7012 #if 0
7013 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7014                                                 u64 parent, u64 root)
7015 {
7016         struct list_head *cur = rec->backrefs.next;
7017         struct extent_backref *node;
7018         struct tree_backref *back;
7019
7020         while(cur != &rec->backrefs) {
7021                 node = to_extent_backref(cur);
7022                 cur = cur->next;
7023                 if (node->is_data)
7024                         continue;
7025                 back = to_tree_backref(node);
7026                 if (parent > 0) {
7027                         if (!node->full_backref)
7028                                 continue;
7029                         if (parent == back->parent)
7030                                 return back;
7031                 } else {
7032                         if (node->full_backref)
7033                                 continue;
7034                         if (back->root == root)
7035                                 return back;
7036                 }
7037         }
7038         return NULL;
7039 }
7040 #endif
7041
7042 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7043                                                 u64 parent, u64 root)
7044 {
7045         struct tree_backref *ref = malloc(sizeof(*ref));
7046
7047         if (!ref)
7048                 return NULL;
7049         memset(&ref->node, 0, sizeof(ref->node));
7050         if (parent > 0) {
7051                 ref->parent = parent;
7052                 ref->node.full_backref = 1;
7053         } else {
7054                 ref->root = root;
7055                 ref->node.full_backref = 0;
7056         }
7057
7058         return ref;
7059 }
7060
7061 #if 0
7062 static struct data_backref *find_data_backref(struct extent_record *rec,
7063                                                 u64 parent, u64 root,
7064                                                 u64 owner, u64 offset,
7065                                                 int found_ref,
7066                                                 u64 disk_bytenr, u64 bytes)
7067 {
7068         struct list_head *cur = rec->backrefs.next;
7069         struct extent_backref *node;
7070         struct data_backref *back;
7071
7072         while(cur != &rec->backrefs) {
7073                 node = to_extent_backref(cur);
7074                 cur = cur->next;
7075                 if (!node->is_data)
7076                         continue;
7077                 back = to_data_backref(node);
7078                 if (parent > 0) {
7079                         if (!node->full_backref)
7080                                 continue;
7081                         if (parent == back->parent)
7082                                 return back;
7083                 } else {
7084                         if (node->full_backref)
7085                                 continue;
7086                         if (back->root == root && back->owner == owner &&
7087                             back->offset == offset) {
7088                                 if (found_ref && node->found_ref &&
7089                                     (back->bytes != bytes ||
7090                                     back->disk_bytenr != disk_bytenr))
7091                                         continue;
7092                                 return back;
7093                         }
7094                 }
7095         }
7096         return NULL;
7097 }
7098 #endif
7099
7100 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7101                                                 u64 parent, u64 root,
7102                                                 u64 owner, u64 offset,
7103                                                 u64 max_size)
7104 {
7105         struct data_backref *ref = malloc(sizeof(*ref));
7106
7107         if (!ref)
7108                 return NULL;
7109         memset(&ref->node, 0, sizeof(ref->node));
7110         ref->node.is_data = 1;
7111
7112         if (parent > 0) {
7113                 ref->parent = parent;
7114                 ref->owner = 0;
7115                 ref->offset = 0;
7116                 ref->node.full_backref = 1;
7117         } else {
7118                 ref->root = root;
7119                 ref->owner = owner;
7120                 ref->offset = offset;
7121                 ref->node.full_backref = 0;
7122         }
7123         ref->bytes = max_size;
7124         ref->found_ref = 0;
7125         ref->num_refs = 0;
7126         if (max_size > rec->max_size)
7127                 rec->max_size = max_size;
7128         return ref;
7129 }
7130
7131 /* Check if the type of extent matches with its chunk */
7132 static void check_extent_type(struct extent_record *rec)
7133 {
7134         struct btrfs_block_group_cache *bg_cache;
7135
7136         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7137         if (!bg_cache)
7138                 return;
7139
7140         /* data extent, check chunk directly*/
7141         if (!rec->metadata) {
7142                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7143                         rec->wrong_chunk_type = 1;
7144                 return;
7145         }
7146
7147         /* metadata extent, check the obvious case first */
7148         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7149                                  BTRFS_BLOCK_GROUP_METADATA))) {
7150                 rec->wrong_chunk_type = 1;
7151                 return;
7152         }
7153
7154         /*
7155          * Check SYSTEM extent, as it's also marked as metadata, we can only
7156          * make sure it's a SYSTEM extent by its backref
7157          */
7158         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7159                 struct extent_backref *node;
7160                 struct tree_backref *tback;
7161                 u64 bg_type;
7162
7163                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7164                 if (node->is_data) {
7165                         /* tree block shouldn't have data backref */
7166                         rec->wrong_chunk_type = 1;
7167                         return;
7168                 }
7169                 tback = container_of(node, struct tree_backref, node);
7170
7171                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7172                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7173                 else
7174                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7175                 if (!(bg_cache->flags & bg_type))
7176                         rec->wrong_chunk_type = 1;
7177         }
7178 }
7179
7180 /*
7181  * Allocate a new extent record, fill default values from @tmpl and insert int
7182  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7183  * the cache, otherwise it fails.
7184  */
7185 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7186                 struct extent_record *tmpl)
7187 {
7188         struct extent_record *rec;
7189         int ret = 0;
7190
7191         BUG_ON(tmpl->max_size == 0);
7192         rec = malloc(sizeof(*rec));
7193         if (!rec)
7194                 return -ENOMEM;
7195         rec->start = tmpl->start;
7196         rec->max_size = tmpl->max_size;
7197         rec->nr = max(tmpl->nr, tmpl->max_size);
7198         rec->found_rec = tmpl->found_rec;
7199         rec->content_checked = tmpl->content_checked;
7200         rec->owner_ref_checked = tmpl->owner_ref_checked;
7201         rec->num_duplicates = 0;
7202         rec->metadata = tmpl->metadata;
7203         rec->flag_block_full_backref = FLAG_UNSET;
7204         rec->bad_full_backref = 0;
7205         rec->crossing_stripes = 0;
7206         rec->wrong_chunk_type = 0;
7207         rec->is_root = tmpl->is_root;
7208         rec->refs = tmpl->refs;
7209         rec->extent_item_refs = tmpl->extent_item_refs;
7210         rec->parent_generation = tmpl->parent_generation;
7211         INIT_LIST_HEAD(&rec->backrefs);
7212         INIT_LIST_HEAD(&rec->dups);
7213         INIT_LIST_HEAD(&rec->list);
7214         rec->backref_tree = RB_ROOT;
7215         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7216         rec->cache.start = tmpl->start;
7217         rec->cache.size = tmpl->nr;
7218         ret = insert_cache_extent(extent_cache, &rec->cache);
7219         if (ret) {
7220                 free(rec);
7221                 return ret;
7222         }
7223         bytes_used += rec->nr;
7224
7225         if (tmpl->metadata)
7226                 rec->crossing_stripes = check_crossing_stripes(global_info,
7227                                 rec->start, global_info->nodesize);
7228         check_extent_type(rec);
7229         return ret;
7230 }
7231
7232 /*
7233  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7234  * some are hints:
7235  * - refs              - if found, increase refs
7236  * - is_root           - if found, set
7237  * - content_checked   - if found, set
7238  * - owner_ref_checked - if found, set
7239  *
7240  * If not found, create a new one, initialize and insert.
7241  */
7242 static int add_extent_rec(struct cache_tree *extent_cache,
7243                 struct extent_record *tmpl)
7244 {
7245         struct extent_record *rec;
7246         struct cache_extent *cache;
7247         int ret = 0;
7248         int dup = 0;
7249
7250         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7251         if (cache) {
7252                 rec = container_of(cache, struct extent_record, cache);
7253                 if (tmpl->refs)
7254                         rec->refs++;
7255                 if (rec->nr == 1)
7256                         rec->nr = max(tmpl->nr, tmpl->max_size);
7257
7258                 /*
7259                  * We need to make sure to reset nr to whatever the extent
7260                  * record says was the real size, this way we can compare it to
7261                  * the backrefs.
7262                  */
7263                 if (tmpl->found_rec) {
7264                         if (tmpl->start != rec->start || rec->found_rec) {
7265                                 struct extent_record *tmp;
7266
7267                                 dup = 1;
7268                                 if (list_empty(&rec->list))
7269                                         list_add_tail(&rec->list,
7270                                                       &duplicate_extents);
7271
7272                                 /*
7273                                  * We have to do this song and dance in case we
7274                                  * find an extent record that falls inside of
7275                                  * our current extent record but does not have
7276                                  * the same objectid.
7277                                  */
7278                                 tmp = malloc(sizeof(*tmp));
7279                                 if (!tmp)
7280                                         return -ENOMEM;
7281                                 tmp->start = tmpl->start;
7282                                 tmp->max_size = tmpl->max_size;
7283                                 tmp->nr = tmpl->nr;
7284                                 tmp->found_rec = 1;
7285                                 tmp->metadata = tmpl->metadata;
7286                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7287                                 INIT_LIST_HEAD(&tmp->list);
7288                                 list_add_tail(&tmp->list, &rec->dups);
7289                                 rec->num_duplicates++;
7290                         } else {
7291                                 rec->nr = tmpl->nr;
7292                                 rec->found_rec = 1;
7293                         }
7294                 }
7295
7296                 if (tmpl->extent_item_refs && !dup) {
7297                         if (rec->extent_item_refs) {
7298                                 fprintf(stderr, "block %llu rec "
7299                                         "extent_item_refs %llu, passed %llu\n",
7300                                         (unsigned long long)tmpl->start,
7301                                         (unsigned long long)
7302                                                         rec->extent_item_refs,
7303                                         (unsigned long long)tmpl->extent_item_refs);
7304                         }
7305                         rec->extent_item_refs = tmpl->extent_item_refs;
7306                 }
7307                 if (tmpl->is_root)
7308                         rec->is_root = 1;
7309                 if (tmpl->content_checked)
7310                         rec->content_checked = 1;
7311                 if (tmpl->owner_ref_checked)
7312                         rec->owner_ref_checked = 1;
7313                 memcpy(&rec->parent_key, &tmpl->parent_key,
7314                                 sizeof(tmpl->parent_key));
7315                 if (tmpl->parent_generation)
7316                         rec->parent_generation = tmpl->parent_generation;
7317                 if (rec->max_size < tmpl->max_size)
7318                         rec->max_size = tmpl->max_size;
7319
7320                 /*
7321                  * A metadata extent can't cross stripe_len boundary, otherwise
7322                  * kernel scrub won't be able to handle it.
7323                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7324                  * it.
7325                  */
7326                 if (tmpl->metadata)
7327                         rec->crossing_stripes = check_crossing_stripes(
7328                                         global_info, rec->start,
7329                                         global_info->nodesize);
7330                 check_extent_type(rec);
7331                 maybe_free_extent_rec(extent_cache, rec);
7332                 return ret;
7333         }
7334
7335         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7336
7337         return ret;
7338 }
7339
7340 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7341                             u64 parent, u64 root, int found_ref)
7342 {
7343         struct extent_record *rec;
7344         struct tree_backref *back;
7345         struct cache_extent *cache;
7346         int ret;
7347         bool insert = false;
7348
7349         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7350         if (!cache) {
7351                 struct extent_record tmpl;
7352
7353                 memset(&tmpl, 0, sizeof(tmpl));
7354                 tmpl.start = bytenr;
7355                 tmpl.nr = 1;
7356                 tmpl.metadata = 1;
7357                 tmpl.max_size = 1;
7358
7359                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7360                 if (ret)
7361                         return ret;
7362
7363                 /* really a bug in cache_extent implement now */
7364                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7365                 if (!cache)
7366                         return -ENOENT;
7367         }
7368
7369         rec = container_of(cache, struct extent_record, cache);
7370         if (rec->start != bytenr) {
7371                 /*
7372                  * Several cause, from unaligned bytenr to over lapping extents
7373                  */
7374                 return -EEXIST;
7375         }
7376
7377         back = find_tree_backref(rec, parent, root);
7378         if (!back) {
7379                 back = alloc_tree_backref(rec, parent, root);
7380                 if (!back)
7381                         return -ENOMEM;
7382                 insert = true;
7383         }
7384
7385         if (found_ref) {
7386                 if (back->node.found_ref) {
7387                         fprintf(stderr, "Extent back ref already exists "
7388                                 "for %llu parent %llu root %llu \n",
7389                                 (unsigned long long)bytenr,
7390                                 (unsigned long long)parent,
7391                                 (unsigned long long)root);
7392                 }
7393                 back->node.found_ref = 1;
7394         } else {
7395                 if (back->node.found_extent_tree) {
7396                         fprintf(stderr, "Extent back ref already exists "
7397                                 "for %llu parent %llu root %llu \n",
7398                                 (unsigned long long)bytenr,
7399                                 (unsigned long long)parent,
7400                                 (unsigned long long)root);
7401                 }
7402                 back->node.found_extent_tree = 1;
7403         }
7404         if (insert)
7405                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7406                         compare_extent_backref));
7407         check_extent_type(rec);
7408         maybe_free_extent_rec(extent_cache, rec);
7409         return 0;
7410 }
7411
7412 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7413                             u64 parent, u64 root, u64 owner, u64 offset,
7414                             u32 num_refs, int found_ref, u64 max_size)
7415 {
7416         struct extent_record *rec;
7417         struct data_backref *back;
7418         struct cache_extent *cache;
7419         int ret;
7420         bool insert = false;
7421
7422         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7423         if (!cache) {
7424                 struct extent_record tmpl;
7425
7426                 memset(&tmpl, 0, sizeof(tmpl));
7427                 tmpl.start = bytenr;
7428                 tmpl.nr = 1;
7429                 tmpl.max_size = max_size;
7430
7431                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7432                 if (ret)
7433                         return ret;
7434
7435                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7436                 if (!cache)
7437                         abort();
7438         }
7439
7440         rec = container_of(cache, struct extent_record, cache);
7441         if (rec->max_size < max_size)
7442                 rec->max_size = max_size;
7443
7444         /*
7445          * If found_ref is set then max_size is the real size and must match the
7446          * existing refs.  So if we have already found a ref then we need to
7447          * make sure that this ref matches the existing one, otherwise we need
7448          * to add a new backref so we can notice that the backrefs don't match
7449          * and we need to figure out who is telling the truth.  This is to
7450          * account for that awful fsync bug I introduced where we'd end up with
7451          * a btrfs_file_extent_item that would have its length include multiple
7452          * prealloc extents or point inside of a prealloc extent.
7453          */
7454         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7455                                  bytenr, max_size);
7456         if (!back) {
7457                 back = alloc_data_backref(rec, parent, root, owner, offset,
7458                                           max_size);
7459                 BUG_ON(!back);
7460                 insert = true;
7461         }
7462
7463         if (found_ref) {
7464                 BUG_ON(num_refs != 1);
7465                 if (back->node.found_ref)
7466                         BUG_ON(back->bytes != max_size);
7467                 back->node.found_ref = 1;
7468                 back->found_ref += 1;
7469                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7470                         back->bytes = max_size;
7471                         back->disk_bytenr = bytenr;
7472
7473                         /* Need to reinsert if not already in the tree */
7474                         if (!insert) {
7475                                 rb_erase(&back->node.node, &rec->backref_tree);
7476                                 insert = true;
7477                         }
7478                 }
7479                 rec->refs += 1;
7480                 rec->content_checked = 1;
7481                 rec->owner_ref_checked = 1;
7482         } else {
7483                 if (back->node.found_extent_tree) {
7484                         fprintf(stderr, "Extent back ref already exists "
7485                                 "for %llu parent %llu root %llu "
7486                                 "owner %llu offset %llu num_refs %lu\n",
7487                                 (unsigned long long)bytenr,
7488                                 (unsigned long long)parent,
7489                                 (unsigned long long)root,
7490                                 (unsigned long long)owner,
7491                                 (unsigned long long)offset,
7492                                 (unsigned long)num_refs);
7493                 }
7494                 back->num_refs = num_refs;
7495                 back->node.found_extent_tree = 1;
7496         }
7497         if (insert)
7498                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7499                         compare_extent_backref));
7500
7501         maybe_free_extent_rec(extent_cache, rec);
7502         return 0;
7503 }
7504
7505 static int add_pending(struct cache_tree *pending,
7506                        struct cache_tree *seen, u64 bytenr, u32 size)
7507 {
7508         int ret;
7509         ret = add_cache_extent(seen, bytenr, size);
7510         if (ret)
7511                 return ret;
7512         add_cache_extent(pending, bytenr, size);
7513         return 0;
7514 }
7515
7516 static int pick_next_pending(struct cache_tree *pending,
7517                         struct cache_tree *reada,
7518                         struct cache_tree *nodes,
7519                         u64 last, struct block_info *bits, int bits_nr,
7520                         int *reada_bits)
7521 {
7522         unsigned long node_start = last;
7523         struct cache_extent *cache;
7524         int ret;
7525
7526         cache = search_cache_extent(reada, 0);
7527         if (cache) {
7528                 bits[0].start = cache->start;
7529                 bits[0].size = cache->size;
7530                 *reada_bits = 1;
7531                 return 1;
7532         }
7533         *reada_bits = 0;
7534         if (node_start > 32768)
7535                 node_start -= 32768;
7536
7537         cache = search_cache_extent(nodes, node_start);
7538         if (!cache)
7539                 cache = search_cache_extent(nodes, 0);
7540
7541         if (!cache) {
7542                  cache = search_cache_extent(pending, 0);
7543                  if (!cache)
7544                          return 0;
7545                  ret = 0;
7546                  do {
7547                          bits[ret].start = cache->start;
7548                          bits[ret].size = cache->size;
7549                          cache = next_cache_extent(cache);
7550                          ret++;
7551                  } while (cache && ret < bits_nr);
7552                  return ret;
7553         }
7554
7555         ret = 0;
7556         do {
7557                 bits[ret].start = cache->start;
7558                 bits[ret].size = cache->size;
7559                 cache = next_cache_extent(cache);
7560                 ret++;
7561         } while (cache && ret < bits_nr);
7562
7563         if (bits_nr - ret > 8) {
7564                 u64 lookup = bits[0].start + bits[0].size;
7565                 struct cache_extent *next;
7566                 next = search_cache_extent(pending, lookup);
7567                 while(next) {
7568                         if (next->start - lookup > 32768)
7569                                 break;
7570                         bits[ret].start = next->start;
7571                         bits[ret].size = next->size;
7572                         lookup = next->start + next->size;
7573                         ret++;
7574                         if (ret == bits_nr)
7575                                 break;
7576                         next = next_cache_extent(next);
7577                         if (!next)
7578                                 break;
7579                 }
7580         }
7581         return ret;
7582 }
7583
7584 static void free_chunk_record(struct cache_extent *cache)
7585 {
7586         struct chunk_record *rec;
7587
7588         rec = container_of(cache, struct chunk_record, cache);
7589         list_del_init(&rec->list);
7590         list_del_init(&rec->dextents);
7591         free(rec);
7592 }
7593
7594 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7595 {
7596         cache_tree_free_extents(chunk_cache, free_chunk_record);
7597 }
7598
7599 static void free_device_record(struct rb_node *node)
7600 {
7601         struct device_record *rec;
7602
7603         rec = container_of(node, struct device_record, node);
7604         free(rec);
7605 }
7606
7607 FREE_RB_BASED_TREE(device_cache, free_device_record);
7608
7609 int insert_block_group_record(struct block_group_tree *tree,
7610                               struct block_group_record *bg_rec)
7611 {
7612         int ret;
7613
7614         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7615         if (ret)
7616                 return ret;
7617
7618         list_add_tail(&bg_rec->list, &tree->block_groups);
7619         return 0;
7620 }
7621
7622 static void free_block_group_record(struct cache_extent *cache)
7623 {
7624         struct block_group_record *rec;
7625
7626         rec = container_of(cache, struct block_group_record, cache);
7627         list_del_init(&rec->list);
7628         free(rec);
7629 }
7630
7631 void free_block_group_tree(struct block_group_tree *tree)
7632 {
7633         cache_tree_free_extents(&tree->tree, free_block_group_record);
7634 }
7635
7636 int insert_device_extent_record(struct device_extent_tree *tree,
7637                                 struct device_extent_record *de_rec)
7638 {
7639         int ret;
7640
7641         /*
7642          * Device extent is a bit different from the other extents, because
7643          * the extents which belong to the different devices may have the
7644          * same start and size, so we need use the special extent cache
7645          * search/insert functions.
7646          */
7647         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7648         if (ret)
7649                 return ret;
7650
7651         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7652         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7653         return 0;
7654 }
7655
7656 static void free_device_extent_record(struct cache_extent *cache)
7657 {
7658         struct device_extent_record *rec;
7659
7660         rec = container_of(cache, struct device_extent_record, cache);
7661         if (!list_empty(&rec->chunk_list))
7662                 list_del_init(&rec->chunk_list);
7663         if (!list_empty(&rec->device_list))
7664                 list_del_init(&rec->device_list);
7665         free(rec);
7666 }
7667
7668 void free_device_extent_tree(struct device_extent_tree *tree)
7669 {
7670         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7671 }
7672
7673 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7674 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7675                                  struct extent_buffer *leaf, int slot)
7676 {
7677         struct btrfs_extent_ref_v0 *ref0;
7678         struct btrfs_key key;
7679         int ret;
7680
7681         btrfs_item_key_to_cpu(leaf, &key, slot);
7682         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7683         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7684                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7685                                 0, 0);
7686         } else {
7687                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7688                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7689         }
7690         return ret;
7691 }
7692 #endif
7693
7694 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7695                                             struct btrfs_key *key,
7696                                             int slot)
7697 {
7698         struct btrfs_chunk *ptr;
7699         struct chunk_record *rec;
7700         int num_stripes, i;
7701
7702         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7703         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7704
7705         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7706         if (!rec) {
7707                 fprintf(stderr, "memory allocation failed\n");
7708                 exit(-1);
7709         }
7710
7711         INIT_LIST_HEAD(&rec->list);
7712         INIT_LIST_HEAD(&rec->dextents);
7713         rec->bg_rec = NULL;
7714
7715         rec->cache.start = key->offset;
7716         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7717
7718         rec->generation = btrfs_header_generation(leaf);
7719
7720         rec->objectid = key->objectid;
7721         rec->type = key->type;
7722         rec->offset = key->offset;
7723
7724         rec->length = rec->cache.size;
7725         rec->owner = btrfs_chunk_owner(leaf, ptr);
7726         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7727         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7728         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7729         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7730         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7731         rec->num_stripes = num_stripes;
7732         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7733
7734         for (i = 0; i < rec->num_stripes; ++i) {
7735                 rec->stripes[i].devid =
7736                         btrfs_stripe_devid_nr(leaf, ptr, i);
7737                 rec->stripes[i].offset =
7738                         btrfs_stripe_offset_nr(leaf, ptr, i);
7739                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7740                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7741                                 BTRFS_UUID_SIZE);
7742         }
7743
7744         return rec;
7745 }
7746
7747 static int process_chunk_item(struct cache_tree *chunk_cache,
7748                               struct btrfs_key *key, struct extent_buffer *eb,
7749                               int slot)
7750 {
7751         struct chunk_record *rec;
7752         struct btrfs_chunk *chunk;
7753         int ret = 0;
7754
7755         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7756         /*
7757          * Do extra check for this chunk item,
7758          *
7759          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7760          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7761          * and owner<->key_type check.
7762          */
7763         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7764                                       key->offset);
7765         if (ret < 0) {
7766                 error("chunk(%llu, %llu) is not valid, ignore it",
7767                       key->offset, btrfs_chunk_length(eb, chunk));
7768                 return 0;
7769         }
7770         rec = btrfs_new_chunk_record(eb, key, slot);
7771         ret = insert_cache_extent(chunk_cache, &rec->cache);
7772         if (ret) {
7773                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7774                         rec->offset, rec->length);
7775                 free(rec);
7776         }
7777
7778         return ret;
7779 }
7780
7781 static int process_device_item(struct rb_root *dev_cache,
7782                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7783 {
7784         struct btrfs_dev_item *ptr;
7785         struct device_record *rec;
7786         int ret = 0;
7787
7788         ptr = btrfs_item_ptr(eb,
7789                 slot, struct btrfs_dev_item);
7790
7791         rec = malloc(sizeof(*rec));
7792         if (!rec) {
7793                 fprintf(stderr, "memory allocation failed\n");
7794                 return -ENOMEM;
7795         }
7796
7797         rec->devid = key->offset;
7798         rec->generation = btrfs_header_generation(eb);
7799
7800         rec->objectid = key->objectid;
7801         rec->type = key->type;
7802         rec->offset = key->offset;
7803
7804         rec->devid = btrfs_device_id(eb, ptr);
7805         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7806         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7807
7808         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7809         if (ret) {
7810                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7811                 free(rec);
7812         }
7813
7814         return ret;
7815 }
7816
7817 struct block_group_record *
7818 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7819                              int slot)
7820 {
7821         struct btrfs_block_group_item *ptr;
7822         struct block_group_record *rec;
7823
7824         rec = calloc(1, sizeof(*rec));
7825         if (!rec) {
7826                 fprintf(stderr, "memory allocation failed\n");
7827                 exit(-1);
7828         }
7829
7830         rec->cache.start = key->objectid;
7831         rec->cache.size = key->offset;
7832
7833         rec->generation = btrfs_header_generation(leaf);
7834
7835         rec->objectid = key->objectid;
7836         rec->type = key->type;
7837         rec->offset = key->offset;
7838
7839         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7840         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7841
7842         INIT_LIST_HEAD(&rec->list);
7843
7844         return rec;
7845 }
7846
7847 static int process_block_group_item(struct block_group_tree *block_group_cache,
7848                                     struct btrfs_key *key,
7849                                     struct extent_buffer *eb, int slot)
7850 {
7851         struct block_group_record *rec;
7852         int ret = 0;
7853
7854         rec = btrfs_new_block_group_record(eb, key, slot);
7855         ret = insert_block_group_record(block_group_cache, rec);
7856         if (ret) {
7857                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7858                         rec->objectid, rec->offset);
7859                 free(rec);
7860         }
7861
7862         return ret;
7863 }
7864
7865 struct device_extent_record *
7866 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7867                                struct btrfs_key *key, int slot)
7868 {
7869         struct device_extent_record *rec;
7870         struct btrfs_dev_extent *ptr;
7871
7872         rec = calloc(1, sizeof(*rec));
7873         if (!rec) {
7874                 fprintf(stderr, "memory allocation failed\n");
7875                 exit(-1);
7876         }
7877
7878         rec->cache.objectid = key->objectid;
7879         rec->cache.start = key->offset;
7880
7881         rec->generation = btrfs_header_generation(leaf);
7882
7883         rec->objectid = key->objectid;
7884         rec->type = key->type;
7885         rec->offset = key->offset;
7886
7887         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7888         rec->chunk_objecteid =
7889                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7890         rec->chunk_offset =
7891                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7892         rec->length = btrfs_dev_extent_length(leaf, ptr);
7893         rec->cache.size = rec->length;
7894
7895         INIT_LIST_HEAD(&rec->chunk_list);
7896         INIT_LIST_HEAD(&rec->device_list);
7897
7898         return rec;
7899 }
7900
7901 static int
7902 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7903                            struct btrfs_key *key, struct extent_buffer *eb,
7904                            int slot)
7905 {
7906         struct device_extent_record *rec;
7907         int ret;
7908
7909         rec = btrfs_new_device_extent_record(eb, key, slot);
7910         ret = insert_device_extent_record(dev_extent_cache, rec);
7911         if (ret) {
7912                 fprintf(stderr,
7913                         "Device extent[%llu, %llu, %llu] existed.\n",
7914                         rec->objectid, rec->offset, rec->length);
7915                 free(rec);
7916         }
7917
7918         return ret;
7919 }
7920
7921 static int process_extent_item(struct btrfs_root *root,
7922                                struct cache_tree *extent_cache,
7923                                struct extent_buffer *eb, int slot)
7924 {
7925         struct btrfs_extent_item *ei;
7926         struct btrfs_extent_inline_ref *iref;
7927         struct btrfs_extent_data_ref *dref;
7928         struct btrfs_shared_data_ref *sref;
7929         struct btrfs_key key;
7930         struct extent_record tmpl;
7931         unsigned long end;
7932         unsigned long ptr;
7933         int ret;
7934         int type;
7935         u32 item_size = btrfs_item_size_nr(eb, slot);
7936         u64 refs = 0;
7937         u64 offset;
7938         u64 num_bytes;
7939         int metadata = 0;
7940
7941         btrfs_item_key_to_cpu(eb, &key, slot);
7942
7943         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7944                 metadata = 1;
7945                 num_bytes = root->fs_info->nodesize;
7946         } else {
7947                 num_bytes = key.offset;
7948         }
7949
7950         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7951                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7952                       key.objectid, root->fs_info->sectorsize);
7953                 return -EIO;
7954         }
7955         if (item_size < sizeof(*ei)) {
7956 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7957                 struct btrfs_extent_item_v0 *ei0;
7958                 if (item_size != sizeof(*ei0)) {
7959                         error(
7960         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7961                                 key.objectid, key.type, key.offset,
7962                                 btrfs_header_bytenr(eb), slot);
7963                         BUG();
7964                 }
7965                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7966                 refs = btrfs_extent_refs_v0(eb, ei0);
7967 #else
7968                 BUG();
7969 #endif
7970                 memset(&tmpl, 0, sizeof(tmpl));
7971                 tmpl.start = key.objectid;
7972                 tmpl.nr = num_bytes;
7973                 tmpl.extent_item_refs = refs;
7974                 tmpl.metadata = metadata;
7975                 tmpl.found_rec = 1;
7976                 tmpl.max_size = num_bytes;
7977
7978                 return add_extent_rec(extent_cache, &tmpl);
7979         }
7980
7981         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7982         refs = btrfs_extent_refs(eb, ei);
7983         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7984                 metadata = 1;
7985         else
7986                 metadata = 0;
7987         if (metadata && num_bytes != root->fs_info->nodesize) {
7988                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7989                       num_bytes, root->fs_info->nodesize);
7990                 return -EIO;
7991         }
7992         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7993                 error("ignore invalid data extent, length %llu is not aligned to %u",
7994                       num_bytes, root->fs_info->sectorsize);
7995                 return -EIO;
7996         }
7997
7998         memset(&tmpl, 0, sizeof(tmpl));
7999         tmpl.start = key.objectid;
8000         tmpl.nr = num_bytes;
8001         tmpl.extent_item_refs = refs;
8002         tmpl.metadata = metadata;
8003         tmpl.found_rec = 1;
8004         tmpl.max_size = num_bytes;
8005         add_extent_rec(extent_cache, &tmpl);
8006
8007         ptr = (unsigned long)(ei + 1);
8008         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8009             key.type == BTRFS_EXTENT_ITEM_KEY)
8010                 ptr += sizeof(struct btrfs_tree_block_info);
8011
8012         end = (unsigned long)ei + item_size;
8013         while (ptr < end) {
8014                 iref = (struct btrfs_extent_inline_ref *)ptr;
8015                 type = btrfs_extent_inline_ref_type(eb, iref);
8016                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8017                 switch (type) {
8018                 case BTRFS_TREE_BLOCK_REF_KEY:
8019                         ret = add_tree_backref(extent_cache, key.objectid,
8020                                         0, offset, 0);
8021                         if (ret < 0)
8022                                 error(
8023                         "add_tree_backref failed (extent items tree block): %s",
8024                                       strerror(-ret));
8025                         break;
8026                 case BTRFS_SHARED_BLOCK_REF_KEY:
8027                         ret = add_tree_backref(extent_cache, key.objectid,
8028                                         offset, 0, 0);
8029                         if (ret < 0)
8030                                 error(
8031                         "add_tree_backref failed (extent items shared block): %s",
8032                                       strerror(-ret));
8033                         break;
8034                 case BTRFS_EXTENT_DATA_REF_KEY:
8035                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8036                         add_data_backref(extent_cache, key.objectid, 0,
8037                                         btrfs_extent_data_ref_root(eb, dref),
8038                                         btrfs_extent_data_ref_objectid(eb,
8039                                                                        dref),
8040                                         btrfs_extent_data_ref_offset(eb, dref),
8041                                         btrfs_extent_data_ref_count(eb, dref),
8042                                         0, num_bytes);
8043                         break;
8044                 case BTRFS_SHARED_DATA_REF_KEY:
8045                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8046                         add_data_backref(extent_cache, key.objectid, offset,
8047                                         0, 0, 0,
8048                                         btrfs_shared_data_ref_count(eb, sref),
8049                                         0, num_bytes);
8050                         break;
8051                 default:
8052                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8053                                 key.objectid, key.type, num_bytes);
8054                         goto out;
8055                 }
8056                 ptr += btrfs_extent_inline_ref_size(type);
8057         }
8058         WARN_ON(ptr > end);
8059 out:
8060         return 0;
8061 }
8062
8063 static int check_cache_range(struct btrfs_root *root,
8064                              struct btrfs_block_group_cache *cache,
8065                              u64 offset, u64 bytes)
8066 {
8067         struct btrfs_free_space *entry;
8068         u64 *logical;
8069         u64 bytenr;
8070         int stripe_len;
8071         int i, nr, ret;
8072
8073         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8074                 bytenr = btrfs_sb_offset(i);
8075                 ret = btrfs_rmap_block(root->fs_info,
8076                                        cache->key.objectid, bytenr, 0,
8077                                        &logical, &nr, &stripe_len);
8078                 if (ret)
8079                         return ret;
8080
8081                 while (nr--) {
8082                         if (logical[nr] + stripe_len <= offset)
8083                                 continue;
8084                         if (offset + bytes <= logical[nr])
8085                                 continue;
8086                         if (logical[nr] == offset) {
8087                                 if (stripe_len >= bytes) {
8088                                         free(logical);
8089                                         return 0;
8090                                 }
8091                                 bytes -= stripe_len;
8092                                 offset += stripe_len;
8093                         } else if (logical[nr] < offset) {
8094                                 if (logical[nr] + stripe_len >=
8095                                     offset + bytes) {
8096                                         free(logical);
8097                                         return 0;
8098                                 }
8099                                 bytes = (offset + bytes) -
8100                                         (logical[nr] + stripe_len);
8101                                 offset = logical[nr] + stripe_len;
8102                         } else {
8103                                 /*
8104                                  * Could be tricky, the super may land in the
8105                                  * middle of the area we're checking.  First
8106                                  * check the easiest case, it's at the end.
8107                                  */
8108                                 if (logical[nr] + stripe_len >=
8109                                     bytes + offset) {
8110                                         bytes = logical[nr] - offset;
8111                                         continue;
8112                                 }
8113
8114                                 /* Check the left side */
8115                                 ret = check_cache_range(root, cache,
8116                                                         offset,
8117                                                         logical[nr] - offset);
8118                                 if (ret) {
8119                                         free(logical);
8120                                         return ret;
8121                                 }
8122
8123                                 /* Now we continue with the right side */
8124                                 bytes = (offset + bytes) -
8125                                         (logical[nr] + stripe_len);
8126                                 offset = logical[nr] + stripe_len;
8127                         }
8128                 }
8129
8130                 free(logical);
8131         }
8132
8133         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8134         if (!entry) {
8135                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8136                         offset, offset+bytes);
8137                 return -EINVAL;
8138         }
8139
8140         if (entry->offset != offset) {
8141                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8142                         entry->offset);
8143                 return -EINVAL;
8144         }
8145
8146         if (entry->bytes != bytes) {
8147                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8148                         bytes, entry->bytes, offset);
8149                 return -EINVAL;
8150         }
8151
8152         unlink_free_space(cache->free_space_ctl, entry);
8153         free(entry);
8154         return 0;
8155 }
8156
8157 static int verify_space_cache(struct btrfs_root *root,
8158                               struct btrfs_block_group_cache *cache)
8159 {
8160         struct btrfs_path path;
8161         struct extent_buffer *leaf;
8162         struct btrfs_key key;
8163         u64 last;
8164         int ret = 0;
8165
8166         root = root->fs_info->extent_root;
8167
8168         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8169
8170         btrfs_init_path(&path);
8171         key.objectid = last;
8172         key.offset = 0;
8173         key.type = BTRFS_EXTENT_ITEM_KEY;
8174         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8175         if (ret < 0)
8176                 goto out;
8177         ret = 0;
8178         while (1) {
8179                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8180                         ret = btrfs_next_leaf(root, &path);
8181                         if (ret < 0)
8182                                 goto out;
8183                         if (ret > 0) {
8184                                 ret = 0;
8185                                 break;
8186                         }
8187                 }
8188                 leaf = path.nodes[0];
8189                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8190                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8191                         break;
8192                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8193                     key.type != BTRFS_METADATA_ITEM_KEY) {
8194                         path.slots[0]++;
8195                         continue;
8196                 }
8197
8198                 if (last == key.objectid) {
8199                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8200                                 last = key.objectid + key.offset;
8201                         else
8202                                 last = key.objectid + root->fs_info->nodesize;
8203                         path.slots[0]++;
8204                         continue;
8205                 }
8206
8207                 ret = check_cache_range(root, cache, last,
8208                                         key.objectid - last);
8209                 if (ret)
8210                         break;
8211                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8212                         last = key.objectid + key.offset;
8213                 else
8214                         last = key.objectid + root->fs_info->nodesize;
8215                 path.slots[0]++;
8216         }
8217
8218         if (last < cache->key.objectid + cache->key.offset)
8219                 ret = check_cache_range(root, cache, last,
8220                                         cache->key.objectid +
8221                                         cache->key.offset - last);
8222
8223 out:
8224         btrfs_release_path(&path);
8225
8226         if (!ret &&
8227             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8228                 fprintf(stderr, "There are still entries left in the space "
8229                         "cache\n");
8230                 ret = -EINVAL;
8231         }
8232
8233         return ret;
8234 }
8235
8236 static int check_space_cache(struct btrfs_root *root)
8237 {
8238         struct btrfs_block_group_cache *cache;
8239         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8240         int ret;
8241         int error = 0;
8242
8243         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8244             btrfs_super_generation(root->fs_info->super_copy) !=
8245             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8246                 printf("cache and super generation don't match, space cache "
8247                        "will be invalidated\n");
8248                 return 0;
8249         }
8250
8251         if (ctx.progress_enabled) {
8252                 ctx.tp = TASK_FREE_SPACE;
8253                 task_start(ctx.info);
8254         }
8255
8256         while (1) {
8257                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8258                 if (!cache)
8259                         break;
8260
8261                 start = cache->key.objectid + cache->key.offset;
8262                 if (!cache->free_space_ctl) {
8263                         if (btrfs_init_free_space_ctl(cache,
8264                                                 root->fs_info->sectorsize)) {
8265                                 ret = -ENOMEM;
8266                                 break;
8267                         }
8268                 } else {
8269                         btrfs_remove_free_space_cache(cache);
8270                 }
8271
8272                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8273                         ret = exclude_super_stripes(root, cache);
8274                         if (ret) {
8275                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8276                                         strerror(-ret));
8277                                 error++;
8278                                 continue;
8279                         }
8280                         ret = load_free_space_tree(root->fs_info, cache);
8281                         free_excluded_extents(root, cache);
8282                         if (ret < 0) {
8283                                 fprintf(stderr, "could not load free space tree: %s\n",
8284                                         strerror(-ret));
8285                                 error++;
8286                                 continue;
8287                         }
8288                         error += ret;
8289                 } else {
8290                         ret = load_free_space_cache(root->fs_info, cache);
8291                         if (!ret)
8292                                 continue;
8293                 }
8294
8295                 ret = verify_space_cache(root, cache);
8296                 if (ret) {
8297                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8298                                 cache->key.objectid);
8299                         error++;
8300                 }
8301         }
8302
8303         task_stop(ctx.info);
8304
8305         return error ? -EINVAL : 0;
8306 }
8307
8308 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8309                         u64 num_bytes, unsigned long leaf_offset,
8310                         struct extent_buffer *eb) {
8311
8312         struct btrfs_fs_info *fs_info = root->fs_info;
8313         u64 offset = 0;
8314         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8315         char *data;
8316         unsigned long csum_offset;
8317         u32 csum;
8318         u32 csum_expected;
8319         u64 read_len;
8320         u64 data_checked = 0;
8321         u64 tmp;
8322         int ret = 0;
8323         int mirror;
8324         int num_copies;
8325
8326         if (num_bytes % fs_info->sectorsize)
8327                 return -EINVAL;
8328
8329         data = malloc(num_bytes);
8330         if (!data)
8331                 return -ENOMEM;
8332
8333         while (offset < num_bytes) {
8334                 mirror = 0;
8335 again:
8336                 read_len = num_bytes - offset;
8337                 /* read as much space once a time */
8338                 ret = read_extent_data(fs_info, data + offset,
8339                                 bytenr + offset, &read_len, mirror);
8340                 if (ret)
8341                         goto out;
8342                 data_checked = 0;
8343                 /* verify every 4k data's checksum */
8344                 while (data_checked < read_len) {
8345                         csum = ~(u32)0;
8346                         tmp = offset + data_checked;
8347
8348                         csum = btrfs_csum_data((char *)data + tmp,
8349                                                csum, fs_info->sectorsize);
8350                         btrfs_csum_final(csum, (u8 *)&csum);
8351
8352                         csum_offset = leaf_offset +
8353                                  tmp / fs_info->sectorsize * csum_size;
8354                         read_extent_buffer(eb, (char *)&csum_expected,
8355                                            csum_offset, csum_size);
8356                         /* try another mirror */
8357                         if (csum != csum_expected) {
8358                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8359                                                 mirror, bytenr + tmp,
8360                                                 csum, csum_expected);
8361                                 num_copies = btrfs_num_copies(root->fs_info,
8362                                                 bytenr, num_bytes);
8363                                 if (mirror < num_copies - 1) {
8364                                         mirror += 1;
8365                                         goto again;
8366                                 }
8367                         }
8368                         data_checked += fs_info->sectorsize;
8369                 }
8370                 offset += read_len;
8371         }
8372 out:
8373         free(data);
8374         return ret;
8375 }
8376
8377 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8378                                u64 num_bytes)
8379 {
8380         struct btrfs_path path;
8381         struct extent_buffer *leaf;
8382         struct btrfs_key key;
8383         int ret;
8384
8385         btrfs_init_path(&path);
8386         key.objectid = bytenr;
8387         key.type = BTRFS_EXTENT_ITEM_KEY;
8388         key.offset = (u64)-1;
8389
8390 again:
8391         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8392                                 0, 0);
8393         if (ret < 0) {
8394                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8395                 btrfs_release_path(&path);
8396                 return ret;
8397         } else if (ret) {
8398                 if (path.slots[0] > 0) {
8399                         path.slots[0]--;
8400                 } else {
8401                         ret = btrfs_prev_leaf(root, &path);
8402                         if (ret < 0) {
8403                                 goto out;
8404                         } else if (ret > 0) {
8405                                 ret = 0;
8406                                 goto out;
8407                         }
8408                 }
8409         }
8410
8411         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8412
8413         /*
8414          * Block group items come before extent items if they have the same
8415          * bytenr, so walk back one more just in case.  Dear future traveller,
8416          * first congrats on mastering time travel.  Now if it's not too much
8417          * trouble could you go back to 2006 and tell Chris to make the
8418          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8419          * EXTENT_ITEM_KEY please?
8420          */
8421         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8422                 if (path.slots[0] > 0) {
8423                         path.slots[0]--;
8424                 } else {
8425                         ret = btrfs_prev_leaf(root, &path);
8426                         if (ret < 0) {
8427                                 goto out;
8428                         } else if (ret > 0) {
8429                                 ret = 0;
8430                                 goto out;
8431                         }
8432                 }
8433                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8434         }
8435
8436         while (num_bytes) {
8437                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8438                         ret = btrfs_next_leaf(root, &path);
8439                         if (ret < 0) {
8440                                 fprintf(stderr, "Error going to next leaf "
8441                                         "%d\n", ret);
8442                                 btrfs_release_path(&path);
8443                                 return ret;
8444                         } else if (ret) {
8445                                 break;
8446                         }
8447                 }
8448                 leaf = path.nodes[0];
8449                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8450                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8451                         path.slots[0]++;
8452                         continue;
8453                 }
8454                 if (key.objectid + key.offset < bytenr) {
8455                         path.slots[0]++;
8456                         continue;
8457                 }
8458                 if (key.objectid > bytenr + num_bytes)
8459                         break;
8460
8461                 if (key.objectid == bytenr) {
8462                         if (key.offset >= num_bytes) {
8463                                 num_bytes = 0;
8464                                 break;
8465                         }
8466                         num_bytes -= key.offset;
8467                         bytenr += key.offset;
8468                 } else if (key.objectid < bytenr) {
8469                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8470                                 num_bytes = 0;
8471                                 break;
8472                         }
8473                         num_bytes = (bytenr + num_bytes) -
8474                                 (key.objectid + key.offset);
8475                         bytenr = key.objectid + key.offset;
8476                 } else {
8477                         if (key.objectid + key.offset < bytenr + num_bytes) {
8478                                 u64 new_start = key.objectid + key.offset;
8479                                 u64 new_bytes = bytenr + num_bytes - new_start;
8480
8481                                 /*
8482                                  * Weird case, the extent is in the middle of
8483                                  * our range, we'll have to search one side
8484                                  * and then the other.  Not sure if this happens
8485                                  * in real life, but no harm in coding it up
8486                                  * anyway just in case.
8487                                  */
8488                                 btrfs_release_path(&path);
8489                                 ret = check_extent_exists(root, new_start,
8490                                                           new_bytes);
8491                                 if (ret) {
8492                                         fprintf(stderr, "Right section didn't "
8493                                                 "have a record\n");
8494                                         break;
8495                                 }
8496                                 num_bytes = key.objectid - bytenr;
8497                                 goto again;
8498                         }
8499                         num_bytes = key.objectid - bytenr;
8500                 }
8501                 path.slots[0]++;
8502         }
8503         ret = 0;
8504
8505 out:
8506         if (num_bytes && !ret) {
8507                 fprintf(stderr, "There are no extents for csum range "
8508                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8509                 ret = 1;
8510         }
8511
8512         btrfs_release_path(&path);
8513         return ret;
8514 }
8515
8516 static int check_csums(struct btrfs_root *root)
8517 {
8518         struct btrfs_path path;
8519         struct extent_buffer *leaf;
8520         struct btrfs_key key;
8521         u64 offset = 0, num_bytes = 0;
8522         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8523         int errors = 0;
8524         int ret;
8525         u64 data_len;
8526         unsigned long leaf_offset;
8527
8528         root = root->fs_info->csum_root;
8529         if (!extent_buffer_uptodate(root->node)) {
8530                 fprintf(stderr, "No valid csum tree found\n");
8531                 return -ENOENT;
8532         }
8533
8534         btrfs_init_path(&path);
8535         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8536         key.type = BTRFS_EXTENT_CSUM_KEY;
8537         key.offset = 0;
8538         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8539         if (ret < 0) {
8540                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8541                 btrfs_release_path(&path);
8542                 return ret;
8543         }
8544
8545         if (ret > 0 && path.slots[0])
8546                 path.slots[0]--;
8547         ret = 0;
8548
8549         while (1) {
8550                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8551                         ret = btrfs_next_leaf(root, &path);
8552                         if (ret < 0) {
8553                                 fprintf(stderr, "Error going to next leaf "
8554                                         "%d\n", ret);
8555                                 break;
8556                         }
8557                         if (ret)
8558                                 break;
8559                 }
8560                 leaf = path.nodes[0];
8561
8562                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8563                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8564                         path.slots[0]++;
8565                         continue;
8566                 }
8567
8568                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8569                               csum_size) * root->fs_info->sectorsize;
8570                 if (!check_data_csum)
8571                         goto skip_csum_check;
8572                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8573                 ret = check_extent_csums(root, key.offset, data_len,
8574                                          leaf_offset, leaf);
8575                 if (ret)
8576                         break;
8577 skip_csum_check:
8578                 if (!num_bytes) {
8579                         offset = key.offset;
8580                 } else if (key.offset != offset + num_bytes) {
8581                         ret = check_extent_exists(root, offset, num_bytes);
8582                         if (ret) {
8583                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8584                                         "there is no extent record\n",
8585                                         offset, offset+num_bytes);
8586                                 errors++;
8587                         }
8588                         offset = key.offset;
8589                         num_bytes = 0;
8590                 }
8591                 num_bytes += data_len;
8592                 path.slots[0]++;
8593         }
8594
8595         btrfs_release_path(&path);
8596         return errors;
8597 }
8598
8599 static int is_dropped_key(struct btrfs_key *key,
8600                           struct btrfs_key *drop_key) {
8601         if (key->objectid < drop_key->objectid)
8602                 return 1;
8603         else if (key->objectid == drop_key->objectid) {
8604                 if (key->type < drop_key->type)
8605                         return 1;
8606                 else if (key->type == drop_key->type) {
8607                         if (key->offset < drop_key->offset)
8608                                 return 1;
8609                 }
8610         }
8611         return 0;
8612 }
8613
8614 /*
8615  * Here are the rules for FULL_BACKREF.
8616  *
8617  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8618  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8619  *      FULL_BACKREF set.
8620  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8621  *    if it happened after the relocation occurred since we'll have dropped the
8622  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8623  *    have no real way to know for sure.
8624  *
8625  * We process the blocks one root at a time, and we start from the lowest root
8626  * objectid and go to the highest.  So we can just lookup the owner backref for
8627  * the record and if we don't find it then we know it doesn't exist and we have
8628  * a FULL BACKREF.
8629  *
8630  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8631  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8632  * be set or not and then we can check later once we've gathered all the refs.
8633  */
8634 static int calc_extent_flag(struct cache_tree *extent_cache,
8635                            struct extent_buffer *buf,
8636                            struct root_item_record *ri,
8637                            u64 *flags)
8638 {
8639         struct extent_record *rec;
8640         struct cache_extent *cache;
8641         struct tree_backref *tback;
8642         u64 owner = 0;
8643
8644         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8645         /* we have added this extent before */
8646         if (!cache)
8647                 return -ENOENT;
8648
8649         rec = container_of(cache, struct extent_record, cache);
8650
8651         /*
8652          * Except file/reloc tree, we can not have
8653          * FULL BACKREF MODE
8654          */
8655         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8656                 goto normal;
8657         /*
8658          * root node
8659          */
8660         if (buf->start == ri->bytenr)
8661                 goto normal;
8662
8663         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8664                 goto full_backref;
8665
8666         owner = btrfs_header_owner(buf);
8667         if (owner == ri->objectid)
8668                 goto normal;
8669
8670         tback = find_tree_backref(rec, 0, owner);
8671         if (!tback)
8672                 goto full_backref;
8673 normal:
8674         *flags = 0;
8675         if (rec->flag_block_full_backref != FLAG_UNSET &&
8676             rec->flag_block_full_backref != 0)
8677                 rec->bad_full_backref = 1;
8678         return 0;
8679 full_backref:
8680         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8681         if (rec->flag_block_full_backref != FLAG_UNSET &&
8682             rec->flag_block_full_backref != 1)
8683                 rec->bad_full_backref = 1;
8684         return 0;
8685 }
8686
8687 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8688 {
8689         fprintf(stderr, "Invalid key type(");
8690         print_key_type(stderr, 0, key_type);
8691         fprintf(stderr, ") found in root(");
8692         print_objectid(stderr, rootid, 0);
8693         fprintf(stderr, ")\n");
8694 }
8695
8696 /*
8697  * Check if the key is valid with its extent buffer.
8698  *
8699  * This is a early check in case invalid key exists in a extent buffer
8700  * This is not comprehensive yet, but should prevent wrong key/item passed
8701  * further
8702  */
8703 static int check_type_with_root(u64 rootid, u8 key_type)
8704 {
8705         switch (key_type) {
8706         /* Only valid in chunk tree */
8707         case BTRFS_DEV_ITEM_KEY:
8708         case BTRFS_CHUNK_ITEM_KEY:
8709                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8710                         goto err;
8711                 break;
8712         /* valid in csum and log tree */
8713         case BTRFS_CSUM_TREE_OBJECTID:
8714                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8715                       is_fstree(rootid)))
8716                         goto err;
8717                 break;
8718         case BTRFS_EXTENT_ITEM_KEY:
8719         case BTRFS_METADATA_ITEM_KEY:
8720         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8721                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8722                         goto err;
8723                 break;
8724         case BTRFS_ROOT_ITEM_KEY:
8725                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8726                         goto err;
8727                 break;
8728         case BTRFS_DEV_EXTENT_KEY:
8729                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8730                         goto err;
8731                 break;
8732         }
8733         return 0;
8734 err:
8735         report_mismatch_key_root(key_type, rootid);
8736         return -EINVAL;
8737 }
8738
8739 static int run_next_block(struct btrfs_root *root,
8740                           struct block_info *bits,
8741                           int bits_nr,
8742                           u64 *last,
8743                           struct cache_tree *pending,
8744                           struct cache_tree *seen,
8745                           struct cache_tree *reada,
8746                           struct cache_tree *nodes,
8747                           struct cache_tree *extent_cache,
8748                           struct cache_tree *chunk_cache,
8749                           struct rb_root *dev_cache,
8750                           struct block_group_tree *block_group_cache,
8751                           struct device_extent_tree *dev_extent_cache,
8752                           struct root_item_record *ri)
8753 {
8754         struct btrfs_fs_info *fs_info = root->fs_info;
8755         struct extent_buffer *buf;
8756         struct extent_record *rec = NULL;
8757         u64 bytenr;
8758         u32 size;
8759         u64 parent;
8760         u64 owner;
8761         u64 flags;
8762         u64 ptr;
8763         u64 gen = 0;
8764         int ret = 0;
8765         int i;
8766         int nritems;
8767         struct btrfs_key key;
8768         struct cache_extent *cache;
8769         int reada_bits;
8770
8771         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8772                                     bits_nr, &reada_bits);
8773         if (nritems == 0)
8774                 return 1;
8775
8776         if (!reada_bits) {
8777                 for(i = 0; i < nritems; i++) {
8778                         ret = add_cache_extent(reada, bits[i].start,
8779                                                bits[i].size);
8780                         if (ret == -EEXIST)
8781                                 continue;
8782
8783                         /* fixme, get the parent transid */
8784                         readahead_tree_block(fs_info, bits[i].start, 0);
8785                 }
8786         }
8787         *last = bits[0].start;
8788         bytenr = bits[0].start;
8789         size = bits[0].size;
8790
8791         cache = lookup_cache_extent(pending, bytenr, size);
8792         if (cache) {
8793                 remove_cache_extent(pending, cache);
8794                 free(cache);
8795         }
8796         cache = lookup_cache_extent(reada, bytenr, size);
8797         if (cache) {
8798                 remove_cache_extent(reada, cache);
8799                 free(cache);
8800         }
8801         cache = lookup_cache_extent(nodes, bytenr, size);
8802         if (cache) {
8803                 remove_cache_extent(nodes, cache);
8804                 free(cache);
8805         }
8806         cache = lookup_cache_extent(extent_cache, bytenr, size);
8807         if (cache) {
8808                 rec = container_of(cache, struct extent_record, cache);
8809                 gen = rec->parent_generation;
8810         }
8811
8812         /* fixme, get the real parent transid */
8813         buf = read_tree_block(root->fs_info, bytenr, gen);
8814         if (!extent_buffer_uptodate(buf)) {
8815                 record_bad_block_io(root->fs_info,
8816                                     extent_cache, bytenr, size);
8817                 goto out;
8818         }
8819
8820         nritems = btrfs_header_nritems(buf);
8821
8822         flags = 0;
8823         if (!init_extent_tree) {
8824                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8825                                        btrfs_header_level(buf), 1, NULL,
8826                                        &flags);
8827                 if (ret < 0) {
8828                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8829                         if (ret < 0) {
8830                                 fprintf(stderr, "Couldn't calc extent flags\n");
8831                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8832                         }
8833                 }
8834         } else {
8835                 flags = 0;
8836                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8837                 if (ret < 0) {
8838                         fprintf(stderr, "Couldn't calc extent flags\n");
8839                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8840                 }
8841         }
8842
8843         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8844                 if (ri != NULL &&
8845                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8846                     ri->objectid == btrfs_header_owner(buf)) {
8847                         /*
8848                          * Ok we got to this block from it's original owner and
8849                          * we have FULL_BACKREF set.  Relocation can leave
8850                          * converted blocks over so this is altogether possible,
8851                          * however it's not possible if the generation > the
8852                          * last snapshot, so check for this case.
8853                          */
8854                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8855                             btrfs_header_generation(buf) > ri->last_snapshot) {
8856                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8857                                 rec->bad_full_backref = 1;
8858                         }
8859                 }
8860         } else {
8861                 if (ri != NULL &&
8862                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8863                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8864                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8865                         rec->bad_full_backref = 1;
8866                 }
8867         }
8868
8869         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8870                 rec->flag_block_full_backref = 1;
8871                 parent = bytenr;
8872                 owner = 0;
8873         } else {
8874                 rec->flag_block_full_backref = 0;
8875                 parent = 0;
8876                 owner = btrfs_header_owner(buf);
8877         }
8878
8879         ret = check_block(root, extent_cache, buf, flags);
8880         if (ret)
8881                 goto out;
8882
8883         if (btrfs_is_leaf(buf)) {
8884                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8885                 for (i = 0; i < nritems; i++) {
8886                         struct btrfs_file_extent_item *fi;
8887                         btrfs_item_key_to_cpu(buf, &key, i);
8888                         /*
8889                          * Check key type against the leaf owner.
8890                          * Could filter quite a lot of early error if
8891                          * owner is correct
8892                          */
8893                         if (check_type_with_root(btrfs_header_owner(buf),
8894                                                  key.type)) {
8895                                 fprintf(stderr, "ignoring invalid key\n");
8896                                 continue;
8897                         }
8898                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8899                                 process_extent_item(root, extent_cache, buf,
8900                                                     i);
8901                                 continue;
8902                         }
8903                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8904                                 process_extent_item(root, extent_cache, buf,
8905                                                     i);
8906                                 continue;
8907                         }
8908                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8909                                 total_csum_bytes +=
8910                                         btrfs_item_size_nr(buf, i);
8911                                 continue;
8912                         }
8913                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8914                                 process_chunk_item(chunk_cache, &key, buf, i);
8915                                 continue;
8916                         }
8917                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8918                                 process_device_item(dev_cache, &key, buf, i);
8919                                 continue;
8920                         }
8921                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8922                                 process_block_group_item(block_group_cache,
8923                                         &key, buf, i);
8924                                 continue;
8925                         }
8926                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8927                                 process_device_extent_item(dev_extent_cache,
8928                                         &key, buf, i);
8929                                 continue;
8930
8931                         }
8932                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8933 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8934                                 process_extent_ref_v0(extent_cache, buf, i);
8935 #else
8936                                 BUG();
8937 #endif
8938                                 continue;
8939                         }
8940
8941                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8942                                 ret = add_tree_backref(extent_cache,
8943                                                 key.objectid, 0, key.offset, 0);
8944                                 if (ret < 0)
8945                                         error(
8946                                 "add_tree_backref failed (leaf tree block): %s",
8947                                               strerror(-ret));
8948                                 continue;
8949                         }
8950                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8951                                 ret = add_tree_backref(extent_cache,
8952                                                 key.objectid, key.offset, 0, 0);
8953                                 if (ret < 0)
8954                                         error(
8955                                 "add_tree_backref failed (leaf shared block): %s",
8956                                               strerror(-ret));
8957                                 continue;
8958                         }
8959                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8960                                 struct btrfs_extent_data_ref *ref;
8961                                 ref = btrfs_item_ptr(buf, i,
8962                                                 struct btrfs_extent_data_ref);
8963                                 add_data_backref(extent_cache,
8964                                         key.objectid, 0,
8965                                         btrfs_extent_data_ref_root(buf, ref),
8966                                         btrfs_extent_data_ref_objectid(buf,
8967                                                                        ref),
8968                                         btrfs_extent_data_ref_offset(buf, ref),
8969                                         btrfs_extent_data_ref_count(buf, ref),
8970                                         0, root->fs_info->sectorsize);
8971                                 continue;
8972                         }
8973                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8974                                 struct btrfs_shared_data_ref *ref;
8975                                 ref = btrfs_item_ptr(buf, i,
8976                                                 struct btrfs_shared_data_ref);
8977                                 add_data_backref(extent_cache,
8978                                         key.objectid, key.offset, 0, 0, 0,
8979                                         btrfs_shared_data_ref_count(buf, ref),
8980                                         0, root->fs_info->sectorsize);
8981                                 continue;
8982                         }
8983                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8984                                 struct bad_item *bad;
8985
8986                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8987                                         continue;
8988                                 if (!owner)
8989                                         continue;
8990                                 bad = malloc(sizeof(struct bad_item));
8991                                 if (!bad)
8992                                         continue;
8993                                 INIT_LIST_HEAD(&bad->list);
8994                                 memcpy(&bad->key, &key,
8995                                        sizeof(struct btrfs_key));
8996                                 bad->root_id = owner;
8997                                 list_add_tail(&bad->list, &delete_items);
8998                                 continue;
8999                         }
9000                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9001                                 continue;
9002                         fi = btrfs_item_ptr(buf, i,
9003                                             struct btrfs_file_extent_item);
9004                         if (btrfs_file_extent_type(buf, fi) ==
9005                             BTRFS_FILE_EXTENT_INLINE)
9006                                 continue;
9007                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9008                                 continue;
9009
9010                         data_bytes_allocated +=
9011                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9012                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9013                                 abort();
9014                         }
9015                         data_bytes_referenced +=
9016                                 btrfs_file_extent_num_bytes(buf, fi);
9017                         add_data_backref(extent_cache,
9018                                 btrfs_file_extent_disk_bytenr(buf, fi),
9019                                 parent, owner, key.objectid, key.offset -
9020                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9021                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9022                 }
9023         } else {
9024                 int level;
9025                 struct btrfs_key first_key;
9026
9027                 first_key.objectid = 0;
9028
9029                 if (nritems > 0)
9030                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9031                 level = btrfs_header_level(buf);
9032                 for (i = 0; i < nritems; i++) {
9033                         struct extent_record tmpl;
9034
9035                         ptr = btrfs_node_blockptr(buf, i);
9036                         size = root->fs_info->nodesize;
9037                         btrfs_node_key_to_cpu(buf, &key, i);
9038                         if (ri != NULL) {
9039                                 if ((level == ri->drop_level)
9040                                     && is_dropped_key(&key, &ri->drop_key)) {
9041                                         continue;
9042                                 }
9043                         }
9044
9045                         memset(&tmpl, 0, sizeof(tmpl));
9046                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9047                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9048                         tmpl.start = ptr;
9049                         tmpl.nr = size;
9050                         tmpl.refs = 1;
9051                         tmpl.metadata = 1;
9052                         tmpl.max_size = size;
9053                         ret = add_extent_rec(extent_cache, &tmpl);
9054                         if (ret < 0)
9055                                 goto out;
9056
9057                         ret = add_tree_backref(extent_cache, ptr, parent,
9058                                         owner, 1);
9059                         if (ret < 0) {
9060                                 error(
9061                                 "add_tree_backref failed (non-leaf block): %s",
9062                                       strerror(-ret));
9063                                 continue;
9064                         }
9065
9066                         if (level > 1) {
9067                                 add_pending(nodes, seen, ptr, size);
9068                         } else {
9069                                 add_pending(pending, seen, ptr, size);
9070                         }
9071                 }
9072                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9073                                       nritems) * sizeof(struct btrfs_key_ptr);
9074         }
9075         total_btree_bytes += buf->len;
9076         if (fs_root_objectid(btrfs_header_owner(buf)))
9077                 total_fs_tree_bytes += buf->len;
9078         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9079                 total_extent_tree_bytes += buf->len;
9080 out:
9081         free_extent_buffer(buf);
9082         return ret;
9083 }
9084
9085 static int add_root_to_pending(struct extent_buffer *buf,
9086                                struct cache_tree *extent_cache,
9087                                struct cache_tree *pending,
9088                                struct cache_tree *seen,
9089                                struct cache_tree *nodes,
9090                                u64 objectid)
9091 {
9092         struct extent_record tmpl;
9093         int ret;
9094
9095         if (btrfs_header_level(buf) > 0)
9096                 add_pending(nodes, seen, buf->start, buf->len);
9097         else
9098                 add_pending(pending, seen, buf->start, buf->len);
9099
9100         memset(&tmpl, 0, sizeof(tmpl));
9101         tmpl.start = buf->start;
9102         tmpl.nr = buf->len;
9103         tmpl.is_root = 1;
9104         tmpl.refs = 1;
9105         tmpl.metadata = 1;
9106         tmpl.max_size = buf->len;
9107         add_extent_rec(extent_cache, &tmpl);
9108
9109         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9110             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9111                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9112                                 0, 1);
9113         else
9114                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9115                                 1);
9116         return ret;
9117 }
9118
9119 /* as we fix the tree, we might be deleting blocks that
9120  * we're tracking for repair.  This hook makes sure we
9121  * remove any backrefs for blocks as we are fixing them.
9122  */
9123 static int free_extent_hook(struct btrfs_trans_handle *trans,
9124                             struct btrfs_root *root,
9125                             u64 bytenr, u64 num_bytes, u64 parent,
9126                             u64 root_objectid, u64 owner, u64 offset,
9127                             int refs_to_drop)
9128 {
9129         struct extent_record *rec;
9130         struct cache_extent *cache;
9131         int is_data;
9132         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9133
9134         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9135         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9136         if (!cache)
9137                 return 0;
9138
9139         rec = container_of(cache, struct extent_record, cache);
9140         if (is_data) {
9141                 struct data_backref *back;
9142                 back = find_data_backref(rec, parent, root_objectid, owner,
9143                                          offset, 1, bytenr, num_bytes);
9144                 if (!back)
9145                         goto out;
9146                 if (back->node.found_ref) {
9147                         back->found_ref -= refs_to_drop;
9148                         if (rec->refs)
9149                                 rec->refs -= refs_to_drop;
9150                 }
9151                 if (back->node.found_extent_tree) {
9152                         back->num_refs -= refs_to_drop;
9153                         if (rec->extent_item_refs)
9154                                 rec->extent_item_refs -= refs_to_drop;
9155                 }
9156                 if (back->found_ref == 0)
9157                         back->node.found_ref = 0;
9158                 if (back->num_refs == 0)
9159                         back->node.found_extent_tree = 0;
9160
9161                 if (!back->node.found_extent_tree && back->node.found_ref) {
9162                         rb_erase(&back->node.node, &rec->backref_tree);
9163                         free(back);
9164                 }
9165         } else {
9166                 struct tree_backref *back;
9167                 back = find_tree_backref(rec, parent, root_objectid);
9168                 if (!back)
9169                         goto out;
9170                 if (back->node.found_ref) {
9171                         if (rec->refs)
9172                                 rec->refs--;
9173                         back->node.found_ref = 0;
9174                 }
9175                 if (back->node.found_extent_tree) {
9176                         if (rec->extent_item_refs)
9177                                 rec->extent_item_refs--;
9178                         back->node.found_extent_tree = 0;
9179                 }
9180                 if (!back->node.found_extent_tree && back->node.found_ref) {
9181                         rb_erase(&back->node.node, &rec->backref_tree);
9182                         free(back);
9183                 }
9184         }
9185         maybe_free_extent_rec(extent_cache, rec);
9186 out:
9187         return 0;
9188 }
9189
9190 static int delete_extent_records(struct btrfs_trans_handle *trans,
9191                                  struct btrfs_root *root,
9192                                  struct btrfs_path *path,
9193                                  u64 bytenr)
9194 {
9195         struct btrfs_key key;
9196         struct btrfs_key found_key;
9197         struct extent_buffer *leaf;
9198         int ret;
9199         int slot;
9200
9201
9202         key.objectid = bytenr;
9203         key.type = (u8)-1;
9204         key.offset = (u64)-1;
9205
9206         while(1) {
9207                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9208                                         &key, path, 0, 1);
9209                 if (ret < 0)
9210                         break;
9211
9212                 if (ret > 0) {
9213                         ret = 0;
9214                         if (path->slots[0] == 0)
9215                                 break;
9216                         path->slots[0]--;
9217                 }
9218                 ret = 0;
9219
9220                 leaf = path->nodes[0];
9221                 slot = path->slots[0];
9222
9223                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9224                 if (found_key.objectid != bytenr)
9225                         break;
9226
9227                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9228                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9229                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9230                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9231                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9232                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9233                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9234                         btrfs_release_path(path);
9235                         if (found_key.type == 0) {
9236                                 if (found_key.offset == 0)
9237                                         break;
9238                                 key.offset = found_key.offset - 1;
9239                                 key.type = found_key.type;
9240                         }
9241                         key.type = found_key.type - 1;
9242                         key.offset = (u64)-1;
9243                         continue;
9244                 }
9245
9246                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9247                         found_key.objectid, found_key.type, found_key.offset);
9248
9249                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9250                 if (ret)
9251                         break;
9252                 btrfs_release_path(path);
9253
9254                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9255                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9256                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9257                                 found_key.offset : root->fs_info->nodesize;
9258
9259                         ret = btrfs_update_block_group(root, bytenr,
9260                                                        bytes, 0, 0);
9261                         if (ret)
9262                                 break;
9263                 }
9264         }
9265
9266         btrfs_release_path(path);
9267         return ret;
9268 }
9269
9270 /*
9271  * for a single backref, this will allocate a new extent
9272  * and add the backref to it.
9273  */
9274 static int record_extent(struct btrfs_trans_handle *trans,
9275                          struct btrfs_fs_info *info,
9276                          struct btrfs_path *path,
9277                          struct extent_record *rec,
9278                          struct extent_backref *back,
9279                          int allocated, u64 flags)
9280 {
9281         int ret = 0;
9282         struct btrfs_root *extent_root = info->extent_root;
9283         struct extent_buffer *leaf;
9284         struct btrfs_key ins_key;
9285         struct btrfs_extent_item *ei;
9286         struct data_backref *dback;
9287         struct btrfs_tree_block_info *bi;
9288
9289         if (!back->is_data)
9290                 rec->max_size = max_t(u64, rec->max_size,
9291                                     info->nodesize);
9292
9293         if (!allocated) {
9294                 u32 item_size = sizeof(*ei);
9295
9296                 if (!back->is_data)
9297                         item_size += sizeof(*bi);
9298
9299                 ins_key.objectid = rec->start;
9300                 ins_key.offset = rec->max_size;
9301                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9302
9303                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9304                                         &ins_key, item_size);
9305                 if (ret)
9306                         goto fail;
9307
9308                 leaf = path->nodes[0];
9309                 ei = btrfs_item_ptr(leaf, path->slots[0],
9310                                     struct btrfs_extent_item);
9311
9312                 btrfs_set_extent_refs(leaf, ei, 0);
9313                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9314
9315                 if (back->is_data) {
9316                         btrfs_set_extent_flags(leaf, ei,
9317                                                BTRFS_EXTENT_FLAG_DATA);
9318                 } else {
9319                         struct btrfs_disk_key copy_key;;
9320
9321                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9322                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9323                                              sizeof(*bi));
9324
9325                         btrfs_set_disk_key_objectid(&copy_key,
9326                                                     rec->info_objectid);
9327                         btrfs_set_disk_key_type(&copy_key, 0);
9328                         btrfs_set_disk_key_offset(&copy_key, 0);
9329
9330                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9331                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9332
9333                         btrfs_set_extent_flags(leaf, ei,
9334                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9335                 }
9336
9337                 btrfs_mark_buffer_dirty(leaf);
9338                 ret = btrfs_update_block_group(extent_root, rec->start,
9339                                                rec->max_size, 1, 0);
9340                 if (ret)
9341                         goto fail;
9342                 btrfs_release_path(path);
9343         }
9344
9345         if (back->is_data) {
9346                 u64 parent;
9347                 int i;
9348
9349                 dback = to_data_backref(back);
9350                 if (back->full_backref)
9351                         parent = dback->parent;
9352                 else
9353                         parent = 0;
9354
9355                 for (i = 0; i < dback->found_ref; i++) {
9356                         /* if parent != 0, we're doing a full backref
9357                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9358                          * just makes the backref allocator create a data
9359                          * backref
9360                          */
9361                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9362                                                    rec->start, rec->max_size,
9363                                                    parent,
9364                                                    dback->root,
9365                                                    parent ?
9366                                                    BTRFS_FIRST_FREE_OBJECTID :
9367                                                    dback->owner,
9368                                                    dback->offset);
9369                         if (ret)
9370                                 break;
9371                 }
9372                 fprintf(stderr, "adding new data backref"
9373                                 " on %llu %s %llu owner %llu"
9374                                 " offset %llu found %d\n",
9375                                 (unsigned long long)rec->start,
9376                                 back->full_backref ?
9377                                 "parent" : "root",
9378                                 back->full_backref ?
9379                                 (unsigned long long)parent :
9380                                 (unsigned long long)dback->root,
9381                                 (unsigned long long)dback->owner,
9382                                 (unsigned long long)dback->offset,
9383                                 dback->found_ref);
9384         } else {
9385                 u64 parent;
9386                 struct tree_backref *tback;
9387
9388                 tback = to_tree_backref(back);
9389                 if (back->full_backref)
9390                         parent = tback->parent;
9391                 else
9392                         parent = 0;
9393
9394                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9395                                            rec->start, rec->max_size,
9396                                            parent, tback->root, 0, 0);
9397                 fprintf(stderr, "adding new tree backref on "
9398                         "start %llu len %llu parent %llu root %llu\n",
9399                         rec->start, rec->max_size, parent, tback->root);
9400         }
9401 fail:
9402         btrfs_release_path(path);
9403         return ret;
9404 }
9405
9406 static struct extent_entry *find_entry(struct list_head *entries,
9407                                        u64 bytenr, u64 bytes)
9408 {
9409         struct extent_entry *entry = NULL;
9410
9411         list_for_each_entry(entry, entries, list) {
9412                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9413                         return entry;
9414         }
9415
9416         return NULL;
9417 }
9418
9419 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9420 {
9421         struct extent_entry *entry, *best = NULL, *prev = NULL;
9422
9423         list_for_each_entry(entry, entries, list) {
9424                 /*
9425                  * If there are as many broken entries as entries then we know
9426                  * not to trust this particular entry.
9427                  */
9428                 if (entry->broken == entry->count)
9429                         continue;
9430
9431                 /*
9432                  * Special case, when there are only two entries and 'best' is
9433                  * the first one
9434                  */
9435                 if (!prev) {
9436                         best = entry;
9437                         prev = entry;
9438                         continue;
9439                 }
9440
9441                 /*
9442                  * If our current entry == best then we can't be sure our best
9443                  * is really the best, so we need to keep searching.
9444                  */
9445                 if (best && best->count == entry->count) {
9446                         prev = entry;
9447                         best = NULL;
9448                         continue;
9449                 }
9450
9451                 /* Prev == entry, not good enough, have to keep searching */
9452                 if (!prev->broken && prev->count == entry->count)
9453                         continue;
9454
9455                 if (!best)
9456                         best = (prev->count > entry->count) ? prev : entry;
9457                 else if (best->count < entry->count)
9458                         best = entry;
9459                 prev = entry;
9460         }
9461
9462         return best;
9463 }
9464
9465 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9466                       struct data_backref *dback, struct extent_entry *entry)
9467 {
9468         struct btrfs_trans_handle *trans;
9469         struct btrfs_root *root;
9470         struct btrfs_file_extent_item *fi;
9471         struct extent_buffer *leaf;
9472         struct btrfs_key key;
9473         u64 bytenr, bytes;
9474         int ret, err;
9475
9476         key.objectid = dback->root;
9477         key.type = BTRFS_ROOT_ITEM_KEY;
9478         key.offset = (u64)-1;
9479         root = btrfs_read_fs_root(info, &key);
9480         if (IS_ERR(root)) {
9481                 fprintf(stderr, "Couldn't find root for our ref\n");
9482                 return -EINVAL;
9483         }
9484
9485         /*
9486          * The backref points to the original offset of the extent if it was
9487          * split, so we need to search down to the offset we have and then walk
9488          * forward until we find the backref we're looking for.
9489          */
9490         key.objectid = dback->owner;
9491         key.type = BTRFS_EXTENT_DATA_KEY;
9492         key.offset = dback->offset;
9493         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9494         if (ret < 0) {
9495                 fprintf(stderr, "Error looking up ref %d\n", ret);
9496                 return ret;
9497         }
9498
9499         while (1) {
9500                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9501                         ret = btrfs_next_leaf(root, path);
9502                         if (ret) {
9503                                 fprintf(stderr, "Couldn't find our ref, next\n");
9504                                 return -EINVAL;
9505                         }
9506                 }
9507                 leaf = path->nodes[0];
9508                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9509                 if (key.objectid != dback->owner ||
9510                     key.type != BTRFS_EXTENT_DATA_KEY) {
9511                         fprintf(stderr, "Couldn't find our ref, search\n");
9512                         return -EINVAL;
9513                 }
9514                 fi = btrfs_item_ptr(leaf, path->slots[0],
9515                                     struct btrfs_file_extent_item);
9516                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9517                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9518
9519                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9520                         break;
9521                 path->slots[0]++;
9522         }
9523
9524         btrfs_release_path(path);
9525
9526         trans = btrfs_start_transaction(root, 1);
9527         if (IS_ERR(trans))
9528                 return PTR_ERR(trans);
9529
9530         /*
9531          * Ok we have the key of the file extent we want to fix, now we can cow
9532          * down to the thing and fix it.
9533          */
9534         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9535         if (ret < 0) {
9536                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9537                         key.objectid, key.type, key.offset, ret);
9538                 goto out;
9539         }
9540         if (ret > 0) {
9541                 fprintf(stderr, "Well that's odd, we just found this key "
9542                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9543                         key.offset);
9544                 ret = -EINVAL;
9545                 goto out;
9546         }
9547         leaf = path->nodes[0];
9548         fi = btrfs_item_ptr(leaf, path->slots[0],
9549                             struct btrfs_file_extent_item);
9550
9551         if (btrfs_file_extent_compression(leaf, fi) &&
9552             dback->disk_bytenr != entry->bytenr) {
9553                 fprintf(stderr, "Ref doesn't match the record start and is "
9554                         "compressed, please take a btrfs-image of this file "
9555                         "system and send it to a btrfs developer so they can "
9556                         "complete this functionality for bytenr %Lu\n",
9557                         dback->disk_bytenr);
9558                 ret = -EINVAL;
9559                 goto out;
9560         }
9561
9562         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9563                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9564         } else if (dback->disk_bytenr > entry->bytenr) {
9565                 u64 off_diff, offset;
9566
9567                 off_diff = dback->disk_bytenr - entry->bytenr;
9568                 offset = btrfs_file_extent_offset(leaf, fi);
9569                 if (dback->disk_bytenr + offset +
9570                     btrfs_file_extent_num_bytes(leaf, fi) >
9571                     entry->bytenr + entry->bytes) {
9572                         fprintf(stderr, "Ref is past the entry end, please "
9573                                 "take a btrfs-image of this file system and "
9574                                 "send it to a btrfs developer, ref %Lu\n",
9575                                 dback->disk_bytenr);
9576                         ret = -EINVAL;
9577                         goto out;
9578                 }
9579                 offset += off_diff;
9580                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9581                 btrfs_set_file_extent_offset(leaf, fi, offset);
9582         } else if (dback->disk_bytenr < entry->bytenr) {
9583                 u64 offset;
9584
9585                 offset = btrfs_file_extent_offset(leaf, fi);
9586                 if (dback->disk_bytenr + offset < entry->bytenr) {
9587                         fprintf(stderr, "Ref is before the entry start, please"
9588                                 " take a btrfs-image of this file system and "
9589                                 "send it to a btrfs developer, ref %Lu\n",
9590                                 dback->disk_bytenr);
9591                         ret = -EINVAL;
9592                         goto out;
9593                 }
9594
9595                 offset += dback->disk_bytenr;
9596                 offset -= entry->bytenr;
9597                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9598                 btrfs_set_file_extent_offset(leaf, fi, offset);
9599         }
9600
9601         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9602
9603         /*
9604          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9605          * only do this if we aren't using compression, otherwise it's a
9606          * trickier case.
9607          */
9608         if (!btrfs_file_extent_compression(leaf, fi))
9609                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9610         else
9611                 printf("ram bytes may be wrong?\n");
9612         btrfs_mark_buffer_dirty(leaf);
9613 out:
9614         err = btrfs_commit_transaction(trans, root);
9615         btrfs_release_path(path);
9616         return ret ? ret : err;
9617 }
9618
9619 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9620                            struct extent_record *rec)
9621 {
9622         struct extent_backref *back, *tmp;
9623         struct data_backref *dback;
9624         struct extent_entry *entry, *best = NULL;
9625         LIST_HEAD(entries);
9626         int nr_entries = 0;
9627         int broken_entries = 0;
9628         int ret = 0;
9629         short mismatch = 0;
9630
9631         /*
9632          * Metadata is easy and the backrefs should always agree on bytenr and
9633          * size, if not we've got bigger issues.
9634          */
9635         if (rec->metadata)
9636                 return 0;
9637
9638         rbtree_postorder_for_each_entry_safe(back, tmp,
9639                                              &rec->backref_tree, node) {
9640                 if (back->full_backref || !back->is_data)
9641                         continue;
9642
9643                 dback = to_data_backref(back);
9644
9645                 /*
9646                  * We only pay attention to backrefs that we found a real
9647                  * backref for.
9648                  */
9649                 if (dback->found_ref == 0)
9650                         continue;
9651
9652                 /*
9653                  * For now we only catch when the bytes don't match, not the
9654                  * bytenr.  We can easily do this at the same time, but I want
9655                  * to have a fs image to test on before we just add repair
9656                  * functionality willy-nilly so we know we won't screw up the
9657                  * repair.
9658                  */
9659
9660                 entry = find_entry(&entries, dback->disk_bytenr,
9661                                    dback->bytes);
9662                 if (!entry) {
9663                         entry = malloc(sizeof(struct extent_entry));
9664                         if (!entry) {
9665                                 ret = -ENOMEM;
9666                                 goto out;
9667                         }
9668                         memset(entry, 0, sizeof(*entry));
9669                         entry->bytenr = dback->disk_bytenr;
9670                         entry->bytes = dback->bytes;
9671                         list_add_tail(&entry->list, &entries);
9672                         nr_entries++;
9673                 }
9674
9675                 /*
9676                  * If we only have on entry we may think the entries agree when
9677                  * in reality they don't so we have to do some extra checking.
9678                  */
9679                 if (dback->disk_bytenr != rec->start ||
9680                     dback->bytes != rec->nr || back->broken)
9681                         mismatch = 1;
9682
9683                 if (back->broken) {
9684                         entry->broken++;
9685                         broken_entries++;
9686                 }
9687
9688                 entry->count++;
9689         }
9690
9691         /* Yay all the backrefs agree, carry on good sir */
9692         if (nr_entries <= 1 && !mismatch)
9693                 goto out;
9694
9695         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9696                 "%Lu\n", rec->start);
9697
9698         /*
9699          * First we want to see if the backrefs can agree amongst themselves who
9700          * is right, so figure out which one of the entries has the highest
9701          * count.
9702          */
9703         best = find_most_right_entry(&entries);
9704
9705         /*
9706          * Ok so we may have an even split between what the backrefs think, so
9707          * this is where we use the extent ref to see what it thinks.
9708          */
9709         if (!best) {
9710                 entry = find_entry(&entries, rec->start, rec->nr);
9711                 if (!entry && (!broken_entries || !rec->found_rec)) {
9712                         fprintf(stderr, "Backrefs don't agree with each other "
9713                                 "and extent record doesn't agree with anybody,"
9714                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9715                                 rec->start, rec->nr);
9716                         ret = -EINVAL;
9717                         goto out;
9718                 } else if (!entry) {
9719                         /*
9720                          * Ok our backrefs were broken, we'll assume this is the
9721                          * correct value and add an entry for this range.
9722                          */
9723                         entry = malloc(sizeof(struct extent_entry));
9724                         if (!entry) {
9725                                 ret = -ENOMEM;
9726                                 goto out;
9727                         }
9728                         memset(entry, 0, sizeof(*entry));
9729                         entry->bytenr = rec->start;
9730                         entry->bytes = rec->nr;
9731                         list_add_tail(&entry->list, &entries);
9732                         nr_entries++;
9733                 }
9734                 entry->count++;
9735                 best = find_most_right_entry(&entries);
9736                 if (!best) {
9737                         fprintf(stderr, "Backrefs and extent record evenly "
9738                                 "split on who is right, this is going to "
9739                                 "require user input to fix bytenr %Lu bytes "
9740                                 "%Lu\n", rec->start, rec->nr);
9741                         ret = -EINVAL;
9742                         goto out;
9743                 }
9744         }
9745
9746         /*
9747          * I don't think this can happen currently as we'll abort() if we catch
9748          * this case higher up, but in case somebody removes that we still can't
9749          * deal with it properly here yet, so just bail out of that's the case.
9750          */
9751         if (best->bytenr != rec->start) {
9752                 fprintf(stderr, "Extent start and backref starts don't match, "
9753                         "please use btrfs-image on this file system and send "
9754                         "it to a btrfs developer so they can make fsck fix "
9755                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9756                         rec->start, rec->nr);
9757                 ret = -EINVAL;
9758                 goto out;
9759         }
9760
9761         /*
9762          * Ok great we all agreed on an extent record, let's go find the real
9763          * references and fix up the ones that don't match.
9764          */
9765         rbtree_postorder_for_each_entry_safe(back, tmp,
9766                                              &rec->backref_tree, node) {
9767                 if (back->full_backref || !back->is_data)
9768                         continue;
9769
9770                 dback = to_data_backref(back);
9771
9772                 /*
9773                  * Still ignoring backrefs that don't have a real ref attached
9774                  * to them.
9775                  */
9776                 if (dback->found_ref == 0)
9777                         continue;
9778
9779                 if (dback->bytes == best->bytes &&
9780                     dback->disk_bytenr == best->bytenr)
9781                         continue;
9782
9783                 ret = repair_ref(info, path, dback, best);
9784                 if (ret)
9785                         goto out;
9786         }
9787
9788         /*
9789          * Ok we messed with the actual refs, which means we need to drop our
9790          * entire cache and go back and rescan.  I know this is a huge pain and
9791          * adds a lot of extra work, but it's the only way to be safe.  Once all
9792          * the backrefs agree we may not need to do anything to the extent
9793          * record itself.
9794          */
9795         ret = -EAGAIN;
9796 out:
9797         while (!list_empty(&entries)) {
9798                 entry = list_entry(entries.next, struct extent_entry, list);
9799                 list_del_init(&entry->list);
9800                 free(entry);
9801         }
9802         return ret;
9803 }
9804
9805 static int process_duplicates(struct cache_tree *extent_cache,
9806                               struct extent_record *rec)
9807 {
9808         struct extent_record *good, *tmp;
9809         struct cache_extent *cache;
9810         int ret;
9811
9812         /*
9813          * If we found a extent record for this extent then return, or if we
9814          * have more than one duplicate we are likely going to need to delete
9815          * something.
9816          */
9817         if (rec->found_rec || rec->num_duplicates > 1)
9818                 return 0;
9819
9820         /* Shouldn't happen but just in case */
9821         BUG_ON(!rec->num_duplicates);
9822
9823         /*
9824          * So this happens if we end up with a backref that doesn't match the
9825          * actual extent entry.  So either the backref is bad or the extent
9826          * entry is bad.  Either way we want to have the extent_record actually
9827          * reflect what we found in the extent_tree, so we need to take the
9828          * duplicate out and use that as the extent_record since the only way we
9829          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9830          */
9831         remove_cache_extent(extent_cache, &rec->cache);
9832
9833         good = to_extent_record(rec->dups.next);
9834         list_del_init(&good->list);
9835         INIT_LIST_HEAD(&good->backrefs);
9836         INIT_LIST_HEAD(&good->dups);
9837         good->cache.start = good->start;
9838         good->cache.size = good->nr;
9839         good->content_checked = 0;
9840         good->owner_ref_checked = 0;
9841         good->num_duplicates = 0;
9842         good->refs = rec->refs;
9843         list_splice_init(&rec->backrefs, &good->backrefs);
9844         while (1) {
9845                 cache = lookup_cache_extent(extent_cache, good->start,
9846                                             good->nr);
9847                 if (!cache)
9848                         break;
9849                 tmp = container_of(cache, struct extent_record, cache);
9850
9851                 /*
9852                  * If we find another overlapping extent and it's found_rec is
9853                  * set then it's a duplicate and we need to try and delete
9854                  * something.
9855                  */
9856                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9857                         if (list_empty(&good->list))
9858                                 list_add_tail(&good->list,
9859                                               &duplicate_extents);
9860                         good->num_duplicates += tmp->num_duplicates + 1;
9861                         list_splice_init(&tmp->dups, &good->dups);
9862                         list_del_init(&tmp->list);
9863                         list_add_tail(&tmp->list, &good->dups);
9864                         remove_cache_extent(extent_cache, &tmp->cache);
9865                         continue;
9866                 }
9867
9868                 /*
9869                  * Ok we have another non extent item backed extent rec, so lets
9870                  * just add it to this extent and carry on like we did above.
9871                  */
9872                 good->refs += tmp->refs;
9873                 list_splice_init(&tmp->backrefs, &good->backrefs);
9874                 remove_cache_extent(extent_cache, &tmp->cache);
9875                 free(tmp);
9876         }
9877         ret = insert_cache_extent(extent_cache, &good->cache);
9878         BUG_ON(ret);
9879         free(rec);
9880         return good->num_duplicates ? 0 : 1;
9881 }
9882
9883 static int delete_duplicate_records(struct btrfs_root *root,
9884                                     struct extent_record *rec)
9885 {
9886         struct btrfs_trans_handle *trans;
9887         LIST_HEAD(delete_list);
9888         struct btrfs_path path;
9889         struct extent_record *tmp, *good, *n;
9890         int nr_del = 0;
9891         int ret = 0, err;
9892         struct btrfs_key key;
9893
9894         btrfs_init_path(&path);
9895
9896         good = rec;
9897         /* Find the record that covers all of the duplicates. */
9898         list_for_each_entry(tmp, &rec->dups, list) {
9899                 if (good->start < tmp->start)
9900                         continue;
9901                 if (good->nr > tmp->nr)
9902                         continue;
9903
9904                 if (tmp->start + tmp->nr < good->start + good->nr) {
9905                         fprintf(stderr, "Ok we have overlapping extents that "
9906                                 "aren't completely covered by each other, this "
9907                                 "is going to require more careful thought.  "
9908                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9909                                 tmp->start, tmp->nr, good->start, good->nr);
9910                         abort();
9911                 }
9912                 good = tmp;
9913         }
9914
9915         if (good != rec)
9916                 list_add_tail(&rec->list, &delete_list);
9917
9918         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9919                 if (tmp == good)
9920                         continue;
9921                 list_move_tail(&tmp->list, &delete_list);
9922         }
9923
9924         root = root->fs_info->extent_root;
9925         trans = btrfs_start_transaction(root, 1);
9926         if (IS_ERR(trans)) {
9927                 ret = PTR_ERR(trans);
9928                 goto out;
9929         }
9930
9931         list_for_each_entry(tmp, &delete_list, list) {
9932                 if (tmp->found_rec == 0)
9933                         continue;
9934                 key.objectid = tmp->start;
9935                 key.type = BTRFS_EXTENT_ITEM_KEY;
9936                 key.offset = tmp->nr;
9937
9938                 /* Shouldn't happen but just in case */
9939                 if (tmp->metadata) {
9940                         fprintf(stderr, "Well this shouldn't happen, extent "
9941                                 "record overlaps but is metadata? "
9942                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9943                         abort();
9944                 }
9945
9946                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9947                 if (ret) {
9948                         if (ret > 0)
9949                                 ret = -EINVAL;
9950                         break;
9951                 }
9952                 ret = btrfs_del_item(trans, root, &path);
9953                 if (ret)
9954                         break;
9955                 btrfs_release_path(&path);
9956                 nr_del++;
9957         }
9958         err = btrfs_commit_transaction(trans, root);
9959         if (err && !ret)
9960                 ret = err;
9961 out:
9962         while (!list_empty(&delete_list)) {
9963                 tmp = to_extent_record(delete_list.next);
9964                 list_del_init(&tmp->list);
9965                 if (tmp == rec)
9966                         continue;
9967                 free(tmp);
9968         }
9969
9970         while (!list_empty(&rec->dups)) {
9971                 tmp = to_extent_record(rec->dups.next);
9972                 list_del_init(&tmp->list);
9973                 free(tmp);
9974         }
9975
9976         btrfs_release_path(&path);
9977
9978         if (!ret && !nr_del)
9979                 rec->num_duplicates = 0;
9980
9981         return ret ? ret : nr_del;
9982 }
9983
9984 static int find_possible_backrefs(struct btrfs_fs_info *info,
9985                                   struct btrfs_path *path,
9986                                   struct cache_tree *extent_cache,
9987                                   struct extent_record *rec)
9988 {
9989         struct btrfs_root *root;
9990         struct extent_backref *back, *tmp;
9991         struct data_backref *dback;
9992         struct cache_extent *cache;
9993         struct btrfs_file_extent_item *fi;
9994         struct btrfs_key key;
9995         u64 bytenr, bytes;
9996         int ret;
9997
9998         rbtree_postorder_for_each_entry_safe(back, tmp,
9999                                              &rec->backref_tree, node) {
10000                 /* Don't care about full backrefs (poor unloved backrefs) */
10001                 if (back->full_backref || !back->is_data)
10002                         continue;
10003
10004                 dback = to_data_backref(back);
10005
10006                 /* We found this one, we don't need to do a lookup */
10007                 if (dback->found_ref)
10008                         continue;
10009
10010                 key.objectid = dback->root;
10011                 key.type = BTRFS_ROOT_ITEM_KEY;
10012                 key.offset = (u64)-1;
10013
10014                 root = btrfs_read_fs_root(info, &key);
10015
10016                 /* No root, definitely a bad ref, skip */
10017                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10018                         continue;
10019                 /* Other err, exit */
10020                 if (IS_ERR(root))
10021                         return PTR_ERR(root);
10022
10023                 key.objectid = dback->owner;
10024                 key.type = BTRFS_EXTENT_DATA_KEY;
10025                 key.offset = dback->offset;
10026                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10027                 if (ret) {
10028                         btrfs_release_path(path);
10029                         if (ret < 0)
10030                                 return ret;
10031                         /* Didn't find it, we can carry on */
10032                         ret = 0;
10033                         continue;
10034                 }
10035
10036                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10037                                     struct btrfs_file_extent_item);
10038                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10039                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10040                 btrfs_release_path(path);
10041                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10042                 if (cache) {
10043                         struct extent_record *tmp;
10044                         tmp = container_of(cache, struct extent_record, cache);
10045
10046                         /*
10047                          * If we found an extent record for the bytenr for this
10048                          * particular backref then we can't add it to our
10049                          * current extent record.  We only want to add backrefs
10050                          * that don't have a corresponding extent item in the
10051                          * extent tree since they likely belong to this record
10052                          * and we need to fix it if it doesn't match bytenrs.
10053                          */
10054                         if  (tmp->found_rec)
10055                                 continue;
10056                 }
10057
10058                 dback->found_ref += 1;
10059                 dback->disk_bytenr = bytenr;
10060                 dback->bytes = bytes;
10061
10062                 /*
10063                  * Set this so the verify backref code knows not to trust the
10064                  * values in this backref.
10065                  */
10066                 back->broken = 1;
10067         }
10068
10069         return 0;
10070 }
10071
10072 /*
10073  * Record orphan data ref into corresponding root.
10074  *
10075  * Return 0 if the extent item contains data ref and recorded.
10076  * Return 1 if the extent item contains no useful data ref
10077  *   On that case, it may contains only shared_dataref or metadata backref
10078  *   or the file extent exists(this should be handled by the extent bytenr
10079  *   recovery routine)
10080  * Return <0 if something goes wrong.
10081  */
10082 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10083                                       struct extent_record *rec)
10084 {
10085         struct btrfs_key key;
10086         struct btrfs_root *dest_root;
10087         struct extent_backref *back, *tmp;
10088         struct data_backref *dback;
10089         struct orphan_data_extent *orphan;
10090         struct btrfs_path path;
10091         int recorded_data_ref = 0;
10092         int ret = 0;
10093
10094         if (rec->metadata)
10095                 return 1;
10096         btrfs_init_path(&path);
10097         rbtree_postorder_for_each_entry_safe(back, tmp,
10098                                              &rec->backref_tree, node) {
10099                 if (back->full_backref || !back->is_data ||
10100                     !back->found_extent_tree)
10101                         continue;
10102                 dback = to_data_backref(back);
10103                 if (dback->found_ref)
10104                         continue;
10105                 key.objectid = dback->root;
10106                 key.type = BTRFS_ROOT_ITEM_KEY;
10107                 key.offset = (u64)-1;
10108
10109                 dest_root = btrfs_read_fs_root(fs_info, &key);
10110
10111                 /* For non-exist root we just skip it */
10112                 if (IS_ERR(dest_root) || !dest_root)
10113                         continue;
10114
10115                 key.objectid = dback->owner;
10116                 key.type = BTRFS_EXTENT_DATA_KEY;
10117                 key.offset = dback->offset;
10118
10119                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10120                 btrfs_release_path(&path);
10121                 /*
10122                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10123                  * we need to record it for inode/file extent rebuild.
10124                  * For ret > 0, we record it only for file extent rebuild.
10125                  * For ret == 0, the file extent exists but only bytenr
10126                  * mismatch, let the original bytenr fix routine to handle,
10127                  * don't record it.
10128                  */
10129                 if (ret == 0)
10130                         continue;
10131                 ret = 0;
10132                 orphan = malloc(sizeof(*orphan));
10133                 if (!orphan) {
10134                         ret = -ENOMEM;
10135                         goto out;
10136                 }
10137                 INIT_LIST_HEAD(&orphan->list);
10138                 orphan->root = dback->root;
10139                 orphan->objectid = dback->owner;
10140                 orphan->offset = dback->offset;
10141                 orphan->disk_bytenr = rec->cache.start;
10142                 orphan->disk_len = rec->cache.size;
10143                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10144                 recorded_data_ref = 1;
10145         }
10146 out:
10147         btrfs_release_path(&path);
10148         if (!ret)
10149                 return !recorded_data_ref;
10150         else
10151                 return ret;
10152 }
10153
10154 /*
10155  * when an incorrect extent item is found, this will delete
10156  * all of the existing entries for it and recreate them
10157  * based on what the tree scan found.
10158  */
10159 static int fixup_extent_refs(struct btrfs_fs_info *info,
10160                              struct cache_tree *extent_cache,
10161                              struct extent_record *rec)
10162 {
10163         struct btrfs_trans_handle *trans = NULL;
10164         int ret;
10165         struct btrfs_path path;
10166         struct cache_extent *cache;
10167         struct extent_backref *back, *tmp;
10168         int allocated = 0;
10169         u64 flags = 0;
10170
10171         if (rec->flag_block_full_backref)
10172                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10173
10174         btrfs_init_path(&path);
10175         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10176                 /*
10177                  * Sometimes the backrefs themselves are so broken they don't
10178                  * get attached to any meaningful rec, so first go back and
10179                  * check any of our backrefs that we couldn't find and throw
10180                  * them into the list if we find the backref so that
10181                  * verify_backrefs can figure out what to do.
10182                  */
10183                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10184                 if (ret < 0)
10185                         goto out;
10186         }
10187
10188         /* step one, make sure all of the backrefs agree */
10189         ret = verify_backrefs(info, &path, rec);
10190         if (ret < 0)
10191                 goto out;
10192
10193         trans = btrfs_start_transaction(info->extent_root, 1);
10194         if (IS_ERR(trans)) {
10195                 ret = PTR_ERR(trans);
10196                 goto out;
10197         }
10198
10199         /* step two, delete all the existing records */
10200         ret = delete_extent_records(trans, info->extent_root, &path,
10201                                     rec->start);
10202
10203         if (ret < 0)
10204                 goto out;
10205
10206         /* was this block corrupt?  If so, don't add references to it */
10207         cache = lookup_cache_extent(info->corrupt_blocks,
10208                                     rec->start, rec->max_size);
10209         if (cache) {
10210                 ret = 0;
10211                 goto out;
10212         }
10213
10214         /* step three, recreate all the refs we did find */
10215         rbtree_postorder_for_each_entry_safe(back, tmp,
10216                                              &rec->backref_tree, node) {
10217                 /*
10218                  * if we didn't find any references, don't create a
10219                  * new extent record
10220                  */
10221                 if (!back->found_ref)
10222                         continue;
10223
10224                 rec->bad_full_backref = 0;
10225                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10226                 allocated = 1;
10227
10228                 if (ret)
10229                         goto out;
10230         }
10231 out:
10232         if (trans) {
10233                 int err = btrfs_commit_transaction(trans, info->extent_root);
10234                 if (!ret)
10235                         ret = err;
10236         }
10237
10238         if (!ret)
10239                 fprintf(stderr, "Repaired extent references for %llu\n",
10240                                 (unsigned long long)rec->start);
10241
10242         btrfs_release_path(&path);
10243         return ret;
10244 }
10245
10246 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10247                               struct extent_record *rec)
10248 {
10249         struct btrfs_trans_handle *trans;
10250         struct btrfs_root *root = fs_info->extent_root;
10251         struct btrfs_path path;
10252         struct btrfs_extent_item *ei;
10253         struct btrfs_key key;
10254         u64 flags;
10255         int ret = 0;
10256
10257         key.objectid = rec->start;
10258         if (rec->metadata) {
10259                 key.type = BTRFS_METADATA_ITEM_KEY;
10260                 key.offset = rec->info_level;
10261         } else {
10262                 key.type = BTRFS_EXTENT_ITEM_KEY;
10263                 key.offset = rec->max_size;
10264         }
10265
10266         trans = btrfs_start_transaction(root, 0);
10267         if (IS_ERR(trans))
10268                 return PTR_ERR(trans);
10269
10270         btrfs_init_path(&path);
10271         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10272         if (ret < 0) {
10273                 btrfs_release_path(&path);
10274                 btrfs_commit_transaction(trans, root);
10275                 return ret;
10276         } else if (ret) {
10277                 fprintf(stderr, "Didn't find extent for %llu\n",
10278                         (unsigned long long)rec->start);
10279                 btrfs_release_path(&path);
10280                 btrfs_commit_transaction(trans, root);
10281                 return -ENOENT;
10282         }
10283
10284         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10285                             struct btrfs_extent_item);
10286         flags = btrfs_extent_flags(path.nodes[0], ei);
10287         if (rec->flag_block_full_backref) {
10288                 fprintf(stderr, "setting full backref on %llu\n",
10289                         (unsigned long long)key.objectid);
10290                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10291         } else {
10292                 fprintf(stderr, "clearing full backref on %llu\n",
10293                         (unsigned long long)key.objectid);
10294                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10295         }
10296         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10297         btrfs_mark_buffer_dirty(path.nodes[0]);
10298         btrfs_release_path(&path);
10299         ret = btrfs_commit_transaction(trans, root);
10300         if (!ret)
10301                 fprintf(stderr, "Repaired extent flags for %llu\n",
10302                                 (unsigned long long)rec->start);
10303
10304         return ret;
10305 }
10306
10307 /* right now we only prune from the extent allocation tree */
10308 static int prune_one_block(struct btrfs_trans_handle *trans,
10309                            struct btrfs_fs_info *info,
10310                            struct btrfs_corrupt_block *corrupt)
10311 {
10312         int ret;
10313         struct btrfs_path path;
10314         struct extent_buffer *eb;
10315         u64 found;
10316         int slot;
10317         int nritems;
10318         int level = corrupt->level + 1;
10319
10320         btrfs_init_path(&path);
10321 again:
10322         /* we want to stop at the parent to our busted block */
10323         path.lowest_level = level;
10324
10325         ret = btrfs_search_slot(trans, info->extent_root,
10326                                 &corrupt->key, &path, -1, 1);
10327
10328         if (ret < 0)
10329                 goto out;
10330
10331         eb = path.nodes[level];
10332         if (!eb) {
10333                 ret = -ENOENT;
10334                 goto out;
10335         }
10336
10337         /*
10338          * hopefully the search gave us the block we want to prune,
10339          * lets try that first
10340          */
10341         slot = path.slots[level];
10342         found =  btrfs_node_blockptr(eb, slot);
10343         if (found == corrupt->cache.start)
10344                 goto del_ptr;
10345
10346         nritems = btrfs_header_nritems(eb);
10347
10348         /* the search failed, lets scan this node and hope we find it */
10349         for (slot = 0; slot < nritems; slot++) {
10350                 found =  btrfs_node_blockptr(eb, slot);
10351                 if (found == corrupt->cache.start)
10352                         goto del_ptr;
10353         }
10354         /*
10355          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10356          * to this block
10357          */
10358         if (eb == info->extent_root->node) {
10359                 ret = -ENOENT;
10360                 goto out;
10361         } else {
10362                 level++;
10363                 btrfs_release_path(&path);
10364                 goto again;
10365         }
10366
10367 del_ptr:
10368         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10369         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10370
10371 out:
10372         btrfs_release_path(&path);
10373         return ret;
10374 }
10375
10376 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10377 {
10378         struct btrfs_trans_handle *trans = NULL;
10379         struct cache_extent *cache;
10380         struct btrfs_corrupt_block *corrupt;
10381
10382         while (1) {
10383                 cache = search_cache_extent(info->corrupt_blocks, 0);
10384                 if (!cache)
10385                         break;
10386                 if (!trans) {
10387                         trans = btrfs_start_transaction(info->extent_root, 1);
10388                         if (IS_ERR(trans))
10389                                 return PTR_ERR(trans);
10390                 }
10391                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10392                 prune_one_block(trans, info, corrupt);
10393                 remove_cache_extent(info->corrupt_blocks, cache);
10394         }
10395         if (trans)
10396                 return btrfs_commit_transaction(trans, info->extent_root);
10397         return 0;
10398 }
10399
10400 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10401 {
10402         struct btrfs_block_group_cache *cache;
10403         u64 start, end;
10404         int ret;
10405
10406         while (1) {
10407                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10408                                             &start, &end, EXTENT_DIRTY);
10409                 if (ret)
10410                         break;
10411                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10412         }
10413
10414         start = 0;
10415         while (1) {
10416                 cache = btrfs_lookup_first_block_group(fs_info, start);
10417                 if (!cache)
10418                         break;
10419                 if (cache->cached)
10420                         cache->cached = 0;
10421                 start = cache->key.objectid + cache->key.offset;
10422         }
10423 }
10424
10425 static int check_extent_refs(struct btrfs_root *root,
10426                              struct cache_tree *extent_cache)
10427 {
10428         struct extent_record *rec;
10429         struct cache_extent *cache;
10430         int ret = 0;
10431         int had_dups = 0;
10432         int err = 0;
10433
10434         if (repair) {
10435                 /*
10436                  * if we're doing a repair, we have to make sure
10437                  * we don't allocate from the problem extents.
10438                  * In the worst case, this will be all the
10439                  * extents in the FS
10440                  */
10441                 cache = search_cache_extent(extent_cache, 0);
10442                 while(cache) {
10443                         rec = container_of(cache, struct extent_record, cache);
10444                         set_extent_dirty(root->fs_info->excluded_extents,
10445                                          rec->start,
10446                                          rec->start + rec->max_size - 1);
10447                         cache = next_cache_extent(cache);
10448                 }
10449
10450                 /* pin down all the corrupted blocks too */
10451                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10452                 while(cache) {
10453                         set_extent_dirty(root->fs_info->excluded_extents,
10454                                          cache->start,
10455                                          cache->start + cache->size - 1);
10456                         cache = next_cache_extent(cache);
10457                 }
10458                 prune_corrupt_blocks(root->fs_info);
10459                 reset_cached_block_groups(root->fs_info);
10460         }
10461
10462         reset_cached_block_groups(root->fs_info);
10463
10464         /*
10465          * We need to delete any duplicate entries we find first otherwise we
10466          * could mess up the extent tree when we have backrefs that actually
10467          * belong to a different extent item and not the weird duplicate one.
10468          */
10469         while (repair && !list_empty(&duplicate_extents)) {
10470                 rec = to_extent_record(duplicate_extents.next);
10471                 list_del_init(&rec->list);
10472
10473                 /* Sometimes we can find a backref before we find an actual
10474                  * extent, so we need to process it a little bit to see if there
10475                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10476                  * if this is a backref screwup.  If we need to delete stuff
10477                  * process_duplicates() will return 0, otherwise it will return
10478                  * 1 and we
10479                  */
10480                 if (process_duplicates(extent_cache, rec))
10481                         continue;
10482                 ret = delete_duplicate_records(root, rec);
10483                 if (ret < 0)
10484                         return ret;
10485                 /*
10486                  * delete_duplicate_records will return the number of entries
10487                  * deleted, so if it's greater than 0 then we know we actually
10488                  * did something and we need to remove.
10489                  */
10490                 if (ret)
10491                         had_dups = 1;
10492         }
10493
10494         if (had_dups)
10495                 return -EAGAIN;
10496
10497         while(1) {
10498                 int cur_err = 0;
10499                 int fix = 0;
10500
10501                 cache = search_cache_extent(extent_cache, 0);
10502                 if (!cache)
10503                         break;
10504                 rec = container_of(cache, struct extent_record, cache);
10505                 if (rec->num_duplicates) {
10506                         fprintf(stderr, "extent item %llu has multiple extent "
10507                                 "items\n", (unsigned long long)rec->start);
10508                         cur_err = 1;
10509                 }
10510
10511                 if (rec->refs != rec->extent_item_refs) {
10512                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10513                                 (unsigned long long)rec->start,
10514                                 (unsigned long long)rec->nr);
10515                         fprintf(stderr, "extent item %llu, found %llu\n",
10516                                 (unsigned long long)rec->extent_item_refs,
10517                                 (unsigned long long)rec->refs);
10518                         ret = record_orphan_data_extents(root->fs_info, rec);
10519                         if (ret < 0)
10520                                 goto repair_abort;
10521                         fix = ret;
10522                         cur_err = 1;
10523                 }
10524                 if (all_backpointers_checked(rec, 1)) {
10525                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10526                                 (unsigned long long)rec->start,
10527                                 (unsigned long long)rec->nr);
10528                         fix = 1;
10529                         cur_err = 1;
10530                 }
10531                 if (!rec->owner_ref_checked) {
10532                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10533                                 (unsigned long long)rec->start,
10534                                 (unsigned long long)rec->nr);
10535                         fix = 1;
10536                         cur_err = 1;
10537                 }
10538
10539                 if (repair && fix) {
10540                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10541                         if (ret)
10542                                 goto repair_abort;
10543                 }
10544
10545
10546                 if (rec->bad_full_backref) {
10547                         fprintf(stderr, "bad full backref, on [%llu]\n",
10548                                 (unsigned long long)rec->start);
10549                         if (repair) {
10550                                 ret = fixup_extent_flags(root->fs_info, rec);
10551                                 if (ret)
10552                                         goto repair_abort;
10553                                 fix = 1;
10554                         }
10555                         cur_err = 1;
10556                 }
10557                 /*
10558                  * Although it's not a extent ref's problem, we reuse this
10559                  * routine for error reporting.
10560                  * No repair function yet.
10561                  */
10562                 if (rec->crossing_stripes) {
10563                         fprintf(stderr,
10564                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10565                                 rec->start, rec->start + rec->max_size);
10566                         cur_err = 1;
10567                 }
10568
10569                 if (rec->wrong_chunk_type) {
10570                         fprintf(stderr,
10571                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10572                                 rec->start, rec->start + rec->max_size);
10573                         cur_err = 1;
10574                 }
10575
10576                 err = cur_err;
10577                 remove_cache_extent(extent_cache, cache);
10578                 free_all_extent_backrefs(rec);
10579                 if (!init_extent_tree && repair && (!cur_err || fix))
10580                         clear_extent_dirty(root->fs_info->excluded_extents,
10581                                            rec->start,
10582                                            rec->start + rec->max_size - 1);
10583                 free(rec);
10584         }
10585 repair_abort:
10586         if (repair) {
10587                 if (ret && ret != -EAGAIN) {
10588                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10589                         exit(1);
10590                 } else if (!ret) {
10591                         struct btrfs_trans_handle *trans;
10592
10593                         root = root->fs_info->extent_root;
10594                         trans = btrfs_start_transaction(root, 1);
10595                         if (IS_ERR(trans)) {
10596                                 ret = PTR_ERR(trans);
10597                                 goto repair_abort;
10598                         }
10599
10600                         ret = btrfs_fix_block_accounting(trans, root);
10601                         if (ret)
10602                                 goto repair_abort;
10603                         ret = btrfs_commit_transaction(trans, root);
10604                         if (ret)
10605                                 goto repair_abort;
10606                 }
10607                 return ret;
10608         }
10609
10610         if (err)
10611                 err = -EIO;
10612         return err;
10613 }
10614
10615 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10616 {
10617         u64 stripe_size;
10618
10619         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10620                 stripe_size = length;
10621                 stripe_size /= num_stripes;
10622         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10623                 stripe_size = length * 2;
10624                 stripe_size /= num_stripes;
10625         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10626                 stripe_size = length;
10627                 stripe_size /= (num_stripes - 1);
10628         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10629                 stripe_size = length;
10630                 stripe_size /= (num_stripes - 2);
10631         } else {
10632                 stripe_size = length;
10633         }
10634         return stripe_size;
10635 }
10636
10637 /*
10638  * Check the chunk with its block group/dev list ref:
10639  * Return 0 if all refs seems valid.
10640  * Return 1 if part of refs seems valid, need later check for rebuild ref
10641  * like missing block group and needs to search extent tree to rebuild them.
10642  * Return -1 if essential refs are missing and unable to rebuild.
10643  */
10644 static int check_chunk_refs(struct chunk_record *chunk_rec,
10645                             struct block_group_tree *block_group_cache,
10646                             struct device_extent_tree *dev_extent_cache,
10647                             int silent)
10648 {
10649         struct cache_extent *block_group_item;
10650         struct block_group_record *block_group_rec;
10651         struct cache_extent *dev_extent_item;
10652         struct device_extent_record *dev_extent_rec;
10653         u64 devid;
10654         u64 offset;
10655         u64 length;
10656         int metadump_v2 = 0;
10657         int i;
10658         int ret = 0;
10659
10660         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10661                                                chunk_rec->offset,
10662                                                chunk_rec->length);
10663         if (block_group_item) {
10664                 block_group_rec = container_of(block_group_item,
10665                                                struct block_group_record,
10666                                                cache);
10667                 if (chunk_rec->length != block_group_rec->offset ||
10668                     chunk_rec->offset != block_group_rec->objectid ||
10669                     (!metadump_v2 &&
10670                      chunk_rec->type_flags != block_group_rec->flags)) {
10671                         if (!silent)
10672                                 fprintf(stderr,
10673                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10674                                         chunk_rec->objectid,
10675                                         chunk_rec->type,
10676                                         chunk_rec->offset,
10677                                         chunk_rec->length,
10678                                         chunk_rec->offset,
10679                                         chunk_rec->type_flags,
10680                                         block_group_rec->objectid,
10681                                         block_group_rec->type,
10682                                         block_group_rec->offset,
10683                                         block_group_rec->offset,
10684                                         block_group_rec->objectid,
10685                                         block_group_rec->flags);
10686                         ret = -1;
10687                 } else {
10688                         list_del_init(&block_group_rec->list);
10689                         chunk_rec->bg_rec = block_group_rec;
10690                 }
10691         } else {
10692                 if (!silent)
10693                         fprintf(stderr,
10694                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10695                                 chunk_rec->objectid,
10696                                 chunk_rec->type,
10697                                 chunk_rec->offset,
10698                                 chunk_rec->length,
10699                                 chunk_rec->offset,
10700                                 chunk_rec->type_flags);
10701                 ret = 1;
10702         }
10703
10704         if (metadump_v2)
10705                 return ret;
10706
10707         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10708                                     chunk_rec->num_stripes);
10709         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10710                 devid = chunk_rec->stripes[i].devid;
10711                 offset = chunk_rec->stripes[i].offset;
10712                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10713                                                        devid, offset, length);
10714                 if (dev_extent_item) {
10715                         dev_extent_rec = container_of(dev_extent_item,
10716                                                 struct device_extent_record,
10717                                                 cache);
10718                         if (dev_extent_rec->objectid != devid ||
10719                             dev_extent_rec->offset != offset ||
10720                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10721                             dev_extent_rec->length != length) {
10722                                 if (!silent)
10723                                         fprintf(stderr,
10724                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10725                                                 chunk_rec->objectid,
10726                                                 chunk_rec->type,
10727                                                 chunk_rec->offset,
10728                                                 chunk_rec->stripes[i].devid,
10729                                                 chunk_rec->stripes[i].offset,
10730                                                 dev_extent_rec->objectid,
10731                                                 dev_extent_rec->offset,
10732                                                 dev_extent_rec->length);
10733                                 ret = -1;
10734                         } else {
10735                                 list_move(&dev_extent_rec->chunk_list,
10736                                           &chunk_rec->dextents);
10737                         }
10738                 } else {
10739                         if (!silent)
10740                                 fprintf(stderr,
10741                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10742                                         chunk_rec->objectid,
10743                                         chunk_rec->type,
10744                                         chunk_rec->offset,
10745                                         chunk_rec->stripes[i].devid,
10746                                         chunk_rec->stripes[i].offset);
10747                         ret = -1;
10748                 }
10749         }
10750         return ret;
10751 }
10752
10753 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10754 int check_chunks(struct cache_tree *chunk_cache,
10755                  struct block_group_tree *block_group_cache,
10756                  struct device_extent_tree *dev_extent_cache,
10757                  struct list_head *good, struct list_head *bad,
10758                  struct list_head *rebuild, int silent)
10759 {
10760         struct cache_extent *chunk_item;
10761         struct chunk_record *chunk_rec;
10762         struct block_group_record *bg_rec;
10763         struct device_extent_record *dext_rec;
10764         int err;
10765         int ret = 0;
10766
10767         chunk_item = first_cache_extent(chunk_cache);
10768         while (chunk_item) {
10769                 chunk_rec = container_of(chunk_item, struct chunk_record,
10770                                          cache);
10771                 err = check_chunk_refs(chunk_rec, block_group_cache,
10772                                        dev_extent_cache, silent);
10773                 if (err < 0)
10774                         ret = err;
10775                 if (err == 0 && good)
10776                         list_add_tail(&chunk_rec->list, good);
10777                 if (err > 0 && rebuild)
10778                         list_add_tail(&chunk_rec->list, rebuild);
10779                 if (err < 0 && bad)
10780                         list_add_tail(&chunk_rec->list, bad);
10781                 chunk_item = next_cache_extent(chunk_item);
10782         }
10783
10784         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10785                 if (!silent)
10786                         fprintf(stderr,
10787                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10788                                 bg_rec->objectid,
10789                                 bg_rec->offset,
10790                                 bg_rec->flags);
10791                 if (!ret)
10792                         ret = 1;
10793         }
10794
10795         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10796                             chunk_list) {
10797                 if (!silent)
10798                         fprintf(stderr,
10799                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10800                                 dext_rec->objectid,
10801                                 dext_rec->offset,
10802                                 dext_rec->length);
10803                 if (!ret)
10804                         ret = 1;
10805         }
10806         return ret;
10807 }
10808
10809
10810 static int check_device_used(struct device_record *dev_rec,
10811                              struct device_extent_tree *dext_cache)
10812 {
10813         struct cache_extent *cache;
10814         struct device_extent_record *dev_extent_rec;
10815         u64 total_byte = 0;
10816
10817         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10818         while (cache) {
10819                 dev_extent_rec = container_of(cache,
10820                                               struct device_extent_record,
10821                                               cache);
10822                 if (dev_extent_rec->objectid != dev_rec->devid)
10823                         break;
10824
10825                 list_del_init(&dev_extent_rec->device_list);
10826                 total_byte += dev_extent_rec->length;
10827                 cache = next_cache_extent(cache);
10828         }
10829
10830         if (total_byte != dev_rec->byte_used) {
10831                 fprintf(stderr,
10832                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10833                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10834                         dev_rec->type, dev_rec->offset);
10835                 return -1;
10836         } else {
10837                 return 0;
10838         }
10839 }
10840
10841 /*
10842  * Extra (optional) check for dev_item size to report possbile problem on a new
10843  * kernel.
10844  */
10845 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10846 {
10847         if (!IS_ALIGNED(total_bytes, sectorsize)) {
10848                 warning(
10849 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10850                         devid, total_bytes, sectorsize);
10851                 warning(
10852 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10853                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10854         }
10855 }
10856
10857 /*
10858  * Unlike device size alignment check above, some super total_bytes check
10859  * failure can lead to mount failure for newer kernel.
10860  *
10861  * So this function will return the error for a fatal super total_bytes problem.
10862  */
10863 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10864 {
10865         struct btrfs_device *dev;
10866         struct list_head *dev_list = &fs_info->fs_devices->devices;
10867         u64 total_bytes = 0;
10868         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10869
10870         list_for_each_entry(dev, dev_list, dev_list)
10871                 total_bytes += dev->total_bytes;
10872
10873         /* Important check, which can cause unmountable fs */
10874         if (super_bytes < total_bytes) {
10875                 error("super total bytes %llu smaller than real device(s) size %llu",
10876                         super_bytes, total_bytes);
10877                 error("mounting this fs may fail for newer kernels");
10878                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10879                 return false;
10880         }
10881
10882         /*
10883          * Optional check, just to make everything aligned and match with each
10884          * other.
10885          *
10886          * For a btrfs-image restored fs, we don't need to check it anyway.
10887          */
10888         if (btrfs_super_flags(fs_info->super_copy) &
10889             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10890                 return true;
10891         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10892             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10893             super_bytes != total_bytes) {
10894                 warning("minor unaligned/mismatch device size detected");
10895                 warning(
10896                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10897         }
10898         return true;
10899 }
10900
10901 /* check btrfs_dev_item -> btrfs_dev_extent */
10902 static int check_devices(struct rb_root *dev_cache,
10903                          struct device_extent_tree *dev_extent_cache)
10904 {
10905         struct rb_node *dev_node;
10906         struct device_record *dev_rec;
10907         struct device_extent_record *dext_rec;
10908         int err;
10909         int ret = 0;
10910
10911         dev_node = rb_first(dev_cache);
10912         while (dev_node) {
10913                 dev_rec = container_of(dev_node, struct device_record, node);
10914                 err = check_device_used(dev_rec, dev_extent_cache);
10915                 if (err)
10916                         ret = err;
10917
10918                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10919                                          global_info->sectorsize);
10920                 dev_node = rb_next(dev_node);
10921         }
10922         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10923                             device_list) {
10924                 fprintf(stderr,
10925                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10926                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10927                 if (!ret)
10928                         ret = 1;
10929         }
10930         return ret;
10931 }
10932
10933 static int add_root_item_to_list(struct list_head *head,
10934                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10935                                   u8 level, u8 drop_level,
10936                                   struct btrfs_key *drop_key)
10937 {
10938
10939         struct root_item_record *ri_rec;
10940         ri_rec = malloc(sizeof(*ri_rec));
10941         if (!ri_rec)
10942                 return -ENOMEM;
10943         ri_rec->bytenr = bytenr;
10944         ri_rec->objectid = objectid;
10945         ri_rec->level = level;
10946         ri_rec->drop_level = drop_level;
10947         ri_rec->last_snapshot = last_snapshot;
10948         if (drop_key)
10949                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10950         list_add_tail(&ri_rec->list, head);
10951
10952         return 0;
10953 }
10954
10955 static void free_root_item_list(struct list_head *list)
10956 {
10957         struct root_item_record *ri_rec;
10958
10959         while (!list_empty(list)) {
10960                 ri_rec = list_first_entry(list, struct root_item_record,
10961                                           list);
10962                 list_del_init(&ri_rec->list);
10963                 free(ri_rec);
10964         }
10965 }
10966
10967 static int deal_root_from_list(struct list_head *list,
10968                                struct btrfs_root *root,
10969                                struct block_info *bits,
10970                                int bits_nr,
10971                                struct cache_tree *pending,
10972                                struct cache_tree *seen,
10973                                struct cache_tree *reada,
10974                                struct cache_tree *nodes,
10975                                struct cache_tree *extent_cache,
10976                                struct cache_tree *chunk_cache,
10977                                struct rb_root *dev_cache,
10978                                struct block_group_tree *block_group_cache,
10979                                struct device_extent_tree *dev_extent_cache)
10980 {
10981         int ret = 0;
10982         u64 last;
10983
10984         while (!list_empty(list)) {
10985                 struct root_item_record *rec;
10986                 struct extent_buffer *buf;
10987                 rec = list_entry(list->next,
10988                                  struct root_item_record, list);
10989                 last = 0;
10990                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10991                 if (!extent_buffer_uptodate(buf)) {
10992                         free_extent_buffer(buf);
10993                         ret = -EIO;
10994                         break;
10995                 }
10996                 ret = add_root_to_pending(buf, extent_cache, pending,
10997                                     seen, nodes, rec->objectid);
10998                 if (ret < 0)
10999                         break;
11000                 /*
11001                  * To rebuild extent tree, we need deal with snapshot
11002                  * one by one, otherwise we deal with node firstly which
11003                  * can maximize readahead.
11004                  */
11005                 while (1) {
11006                         ret = run_next_block(root, bits, bits_nr, &last,
11007                                              pending, seen, reada, nodes,
11008                                              extent_cache, chunk_cache,
11009                                              dev_cache, block_group_cache,
11010                                              dev_extent_cache, rec);
11011                         if (ret != 0)
11012                                 break;
11013                 }
11014                 free_extent_buffer(buf);
11015                 list_del(&rec->list);
11016                 free(rec);
11017                 if (ret < 0)
11018                         break;
11019         }
11020         while (ret >= 0) {
11021                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11022                                      reada, nodes, extent_cache, chunk_cache,
11023                                      dev_cache, block_group_cache,
11024                                      dev_extent_cache, NULL);
11025                 if (ret != 0) {
11026                         if (ret > 0)
11027                                 ret = 0;
11028                         break;
11029                 }
11030         }
11031         return ret;
11032 }
11033
11034 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11035 {
11036         struct rb_root dev_cache;
11037         struct cache_tree chunk_cache;
11038         struct block_group_tree block_group_cache;
11039         struct device_extent_tree dev_extent_cache;
11040         struct cache_tree extent_cache;
11041         struct cache_tree seen;
11042         struct cache_tree pending;
11043         struct cache_tree reada;
11044         struct cache_tree nodes;
11045         struct extent_io_tree excluded_extents;
11046         struct cache_tree corrupt_blocks;
11047         struct btrfs_path path;
11048         struct btrfs_key key;
11049         struct btrfs_key found_key;
11050         int ret, err = 0;
11051         struct block_info *bits;
11052         int bits_nr;
11053         struct extent_buffer *leaf;
11054         int slot;
11055         struct btrfs_root_item ri;
11056         struct list_head dropping_trees;
11057         struct list_head normal_trees;
11058         struct btrfs_root *root1;
11059         struct btrfs_root *root;
11060         u64 objectid;
11061         u8 level;
11062
11063         root = fs_info->fs_root;
11064         dev_cache = RB_ROOT;
11065         cache_tree_init(&chunk_cache);
11066         block_group_tree_init(&block_group_cache);
11067         device_extent_tree_init(&dev_extent_cache);
11068
11069         cache_tree_init(&extent_cache);
11070         cache_tree_init(&seen);
11071         cache_tree_init(&pending);
11072         cache_tree_init(&nodes);
11073         cache_tree_init(&reada);
11074         cache_tree_init(&corrupt_blocks);
11075         extent_io_tree_init(&excluded_extents);
11076         INIT_LIST_HEAD(&dropping_trees);
11077         INIT_LIST_HEAD(&normal_trees);
11078
11079         if (repair) {
11080                 fs_info->excluded_extents = &excluded_extents;
11081                 fs_info->fsck_extent_cache = &extent_cache;
11082                 fs_info->free_extent_hook = free_extent_hook;
11083                 fs_info->corrupt_blocks = &corrupt_blocks;
11084         }
11085
11086         bits_nr = 1024;
11087         bits = malloc(bits_nr * sizeof(struct block_info));
11088         if (!bits) {
11089                 perror("malloc");
11090                 exit(1);
11091         }
11092
11093         if (ctx.progress_enabled) {
11094                 ctx.tp = TASK_EXTENTS;
11095                 task_start(ctx.info);
11096         }
11097
11098 again:
11099         root1 = fs_info->tree_root;
11100         level = btrfs_header_level(root1->node);
11101         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11102                                     root1->node->start, 0, level, 0, NULL);
11103         if (ret < 0)
11104                 goto out;
11105         root1 = fs_info->chunk_root;
11106         level = btrfs_header_level(root1->node);
11107         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11108                                     root1->node->start, 0, level, 0, NULL);
11109         if (ret < 0)
11110                 goto out;
11111         btrfs_init_path(&path);
11112         key.offset = 0;
11113         key.objectid = 0;
11114         key.type = BTRFS_ROOT_ITEM_KEY;
11115         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11116         if (ret < 0)
11117                 goto out;
11118         while(1) {
11119                 leaf = path.nodes[0];
11120                 slot = path.slots[0];
11121                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11122                         ret = btrfs_next_leaf(root, &path);
11123                         if (ret != 0)
11124                                 break;
11125                         leaf = path.nodes[0];
11126                         slot = path.slots[0];
11127                 }
11128                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11129                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11130                         unsigned long offset;
11131                         u64 last_snapshot;
11132
11133                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11134                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11135                         last_snapshot = btrfs_root_last_snapshot(&ri);
11136                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11137                                 level = btrfs_root_level(&ri);
11138                                 ret = add_root_item_to_list(&normal_trees,
11139                                                 found_key.objectid,
11140                                                 btrfs_root_bytenr(&ri),
11141                                                 last_snapshot, level,
11142                                                 0, NULL);
11143                                 if (ret < 0)
11144                                         goto out;
11145                         } else {
11146                                 level = btrfs_root_level(&ri);
11147                                 objectid = found_key.objectid;
11148                                 btrfs_disk_key_to_cpu(&found_key,
11149                                                       &ri.drop_progress);
11150                                 ret = add_root_item_to_list(&dropping_trees,
11151                                                 objectid,
11152                                                 btrfs_root_bytenr(&ri),
11153                                                 last_snapshot, level,
11154                                                 ri.drop_level, &found_key);
11155                                 if (ret < 0)
11156                                         goto out;
11157                         }
11158                 }
11159                 path.slots[0]++;
11160         }
11161         btrfs_release_path(&path);
11162
11163         /*
11164          * check_block can return -EAGAIN if it fixes something, please keep
11165          * this in mind when dealing with return values from these functions, if
11166          * we get -EAGAIN we want to fall through and restart the loop.
11167          */
11168         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11169                                   &seen, &reada, &nodes, &extent_cache,
11170                                   &chunk_cache, &dev_cache, &block_group_cache,
11171                                   &dev_extent_cache);
11172         if (ret < 0) {
11173                 if (ret == -EAGAIN)
11174                         goto loop;
11175                 goto out;
11176         }
11177         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11178                                   &pending, &seen, &reada, &nodes,
11179                                   &extent_cache, &chunk_cache, &dev_cache,
11180                                   &block_group_cache, &dev_extent_cache);
11181         if (ret < 0) {
11182                 if (ret == -EAGAIN)
11183                         goto loop;
11184                 goto out;
11185         }
11186
11187         ret = check_chunks(&chunk_cache, &block_group_cache,
11188                            &dev_extent_cache, NULL, NULL, NULL, 0);
11189         if (ret) {
11190                 if (ret == -EAGAIN)
11191                         goto loop;
11192                 err = ret;
11193         }
11194
11195         ret = check_extent_refs(root, &extent_cache);
11196         if (ret < 0) {
11197                 if (ret == -EAGAIN)
11198                         goto loop;
11199                 goto out;
11200         }
11201
11202         ret = check_devices(&dev_cache, &dev_extent_cache);
11203         if (ret && err)
11204                 ret = err;
11205
11206 out:
11207         task_stop(ctx.info);
11208         if (repair) {
11209                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11210                 extent_io_tree_cleanup(&excluded_extents);
11211                 fs_info->fsck_extent_cache = NULL;
11212                 fs_info->free_extent_hook = NULL;
11213                 fs_info->corrupt_blocks = NULL;
11214                 fs_info->excluded_extents = NULL;
11215         }
11216         free(bits);
11217         free_chunk_cache_tree(&chunk_cache);
11218         free_device_cache_tree(&dev_cache);
11219         free_block_group_tree(&block_group_cache);
11220         free_device_extent_tree(&dev_extent_cache);
11221         free_extent_cache_tree(&seen);
11222         free_extent_cache_tree(&pending);
11223         free_extent_cache_tree(&reada);
11224         free_extent_cache_tree(&nodes);
11225         free_root_item_list(&normal_trees);
11226         free_root_item_list(&dropping_trees);
11227         return ret;
11228 loop:
11229         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11230         free_extent_cache_tree(&seen);
11231         free_extent_cache_tree(&pending);
11232         free_extent_cache_tree(&reada);
11233         free_extent_cache_tree(&nodes);
11234         free_chunk_cache_tree(&chunk_cache);
11235         free_block_group_tree(&block_group_cache);
11236         free_device_cache_tree(&dev_cache);
11237         free_device_extent_tree(&dev_extent_cache);
11238         free_extent_record_cache(&extent_cache);
11239         free_root_item_list(&normal_trees);
11240         free_root_item_list(&dropping_trees);
11241         extent_io_tree_cleanup(&excluded_extents);
11242         goto again;
11243 }
11244
11245 static int check_extent_inline_ref(struct extent_buffer *eb,
11246                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11247 {
11248         int ret;
11249         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11250
11251         switch (type) {
11252         case BTRFS_TREE_BLOCK_REF_KEY:
11253         case BTRFS_EXTENT_DATA_REF_KEY:
11254         case BTRFS_SHARED_BLOCK_REF_KEY:
11255         case BTRFS_SHARED_DATA_REF_KEY:
11256                 ret = 0;
11257                 break;
11258         default:
11259                 error("extent[%llu %u %llu] has unknown ref type: %d",
11260                       key->objectid, key->type, key->offset, type);
11261                 ret = UNKNOWN_TYPE;
11262                 break;
11263         }
11264
11265         return ret;
11266 }
11267
11268 /*
11269  * Check backrefs of a tree block given by @bytenr or @eb.
11270  *
11271  * @root:       the root containing the @bytenr or @eb
11272  * @eb:         tree block extent buffer, can be NULL
11273  * @bytenr:     bytenr of the tree block to search
11274  * @level:      tree level of the tree block
11275  * @owner:      owner of the tree block
11276  *
11277  * Return >0 for any error found and output error message
11278  * Return 0 for no error found
11279  */
11280 static int check_tree_block_ref(struct btrfs_root *root,
11281                                 struct extent_buffer *eb, u64 bytenr,
11282                                 int level, u64 owner, struct node_refs *nrefs)
11283 {
11284         struct btrfs_key key;
11285         struct btrfs_root *extent_root = root->fs_info->extent_root;
11286         struct btrfs_path path;
11287         struct btrfs_extent_item *ei;
11288         struct btrfs_extent_inline_ref *iref;
11289         struct extent_buffer *leaf;
11290         unsigned long end;
11291         unsigned long ptr;
11292         int slot;
11293         int skinny_level;
11294         int root_level = btrfs_header_level(root->node);
11295         int type;
11296         u32 nodesize = root->fs_info->nodesize;
11297         u32 item_size;
11298         u64 offset;
11299         int found_ref = 0;
11300         int err = 0;
11301         int ret;
11302         int strict = 1;
11303         int parent = 0;
11304
11305         btrfs_init_path(&path);
11306         key.objectid = bytenr;
11307         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11308                 key.type = BTRFS_METADATA_ITEM_KEY;
11309         else
11310                 key.type = BTRFS_EXTENT_ITEM_KEY;
11311         key.offset = (u64)-1;
11312
11313         /* Search for the backref in extent tree */
11314         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11315         if (ret < 0) {
11316                 err |= BACKREF_MISSING;
11317                 goto out;
11318         }
11319         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11320         if (ret) {
11321                 err |= BACKREF_MISSING;
11322                 goto out;
11323         }
11324
11325         leaf = path.nodes[0];
11326         slot = path.slots[0];
11327         btrfs_item_key_to_cpu(leaf, &key, slot);
11328
11329         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11330
11331         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11332                 skinny_level = (int)key.offset;
11333                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11334         } else {
11335                 struct btrfs_tree_block_info *info;
11336
11337                 info = (struct btrfs_tree_block_info *)(ei + 1);
11338                 skinny_level = btrfs_tree_block_level(leaf, info);
11339                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11340         }
11341
11342
11343         if (eb) {
11344                 u64 header_gen;
11345                 u64 extent_gen;
11346
11347                 /*
11348                  * Due to the feature of shared tree blocks, if the upper node
11349                  * is a fs root or shared node, the extent of checked node may
11350                  * not be updated until the next CoW.
11351                  */
11352                 if (nrefs)
11353                         strict = should_check_extent_strictly(root, nrefs,
11354                                         level);
11355                 if (!(btrfs_extent_flags(leaf, ei) &
11356                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11357                         error(
11358                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11359                                 key.objectid, nodesize,
11360                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11361                         err = BACKREF_MISMATCH;
11362                 }
11363                 header_gen = btrfs_header_generation(eb);
11364                 extent_gen = btrfs_extent_generation(leaf, ei);
11365                 if (header_gen != extent_gen) {
11366                         error(
11367         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11368                                 key.objectid, nodesize, header_gen,
11369                                 extent_gen);
11370                         err = BACKREF_MISMATCH;
11371                 }
11372                 if (level != skinny_level) {
11373                         error(
11374                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11375                                 key.objectid, nodesize, level, skinny_level);
11376                         err = BACKREF_MISMATCH;
11377                 }
11378                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11379                         error(
11380                         "extent[%llu %u] is referred by other roots than %llu",
11381                                 key.objectid, nodesize, root->objectid);
11382                         err = BACKREF_MISMATCH;
11383                 }
11384         }
11385
11386         /*
11387          * Iterate the extent/metadata item to find the exact backref
11388          */
11389         item_size = btrfs_item_size_nr(leaf, slot);
11390         ptr = (unsigned long)iref;
11391         end = (unsigned long)ei + item_size;
11392
11393         while (ptr < end) {
11394                 iref = (struct btrfs_extent_inline_ref *)ptr;
11395                 type = btrfs_extent_inline_ref_type(leaf, iref);
11396                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11397
11398                 ret = check_extent_inline_ref(leaf, &key, iref);
11399                 if (ret) {
11400                         err |= ret;
11401                         break;
11402                 }
11403                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11404                         if (offset == root->objectid)
11405                                 found_ref = 1;
11406                         if (!strict && owner == offset)
11407                                 found_ref = 1;
11408                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11409                         /*
11410                          * Backref of tree reloc root points to itself, no need
11411                          * to check backref any more.
11412                          *
11413                          * This may be an error of loop backref, but extent tree
11414                          * checker should have already handled it.
11415                          * Here we only need to avoid infinite iteration.
11416                          */
11417                         if (offset == bytenr) {
11418                                 found_ref = 1;
11419                         } else {
11420                                 /*
11421                                  * Check if the backref points to valid
11422                                  * referencer
11423                                  */
11424                                 found_ref = !check_tree_block_ref( root, NULL,
11425                                                 offset, level + 1, owner,
11426                                                 NULL);
11427                         }
11428                 }
11429
11430                 if (found_ref)
11431                         break;
11432                 ptr += btrfs_extent_inline_ref_size(type);
11433         }
11434
11435         /*
11436          * Inlined extent item doesn't have what we need, check
11437          * TREE_BLOCK_REF_KEY
11438          */
11439         if (!found_ref) {
11440                 btrfs_release_path(&path);
11441                 key.objectid = bytenr;
11442                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11443                 key.offset = root->objectid;
11444
11445                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11446                 if (!ret)
11447                         found_ref = 1;
11448         }
11449         /*
11450          * Finally check SHARED BLOCK REF, any found will be good
11451          * Here we're not doing comprehensive extent backref checking,
11452          * only need to ensure there is some extent referring to this
11453          * tree block.
11454          */
11455         if (!found_ref) {
11456                 btrfs_release_path(&path);
11457                 key.objectid = bytenr;
11458                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11459                 key.offset = (u64)-1;
11460
11461                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11462                 if (ret < 0) {
11463                         err |= BACKREF_MISSING;
11464                         goto out;
11465                 }
11466                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11467                 if (ret) {
11468                         err |= BACKREF_MISSING;
11469                         goto out;
11470                 }
11471                 found_ref = 1;
11472         }
11473         if (!found_ref)
11474                 err |= BACKREF_MISSING;
11475 out:
11476         btrfs_release_path(&path);
11477         if (nrefs && strict &&
11478             level < root_level && nrefs->full_backref[level + 1])
11479                 parent = nrefs->bytenr[level + 1];
11480         if (eb && (err & BACKREF_MISSING))
11481                 error(
11482         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11483                       bytenr, nodesize, owner, level,
11484                       parent ? "parent" : "root",
11485                       parent ? parent : root->objectid);
11486         return err;
11487 }
11488
11489 /*
11490  * If @err contains BACKREF_MISSING then add extent of the
11491  * file_extent_data_item.
11492  *
11493  * Returns error bits after reapir.
11494  */
11495 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11496                                    struct btrfs_root *root,
11497                                    struct btrfs_path *pathp,
11498                                    struct node_refs *nrefs,
11499                                    int err)
11500 {
11501         struct btrfs_file_extent_item *fi;
11502         struct btrfs_key fi_key;
11503         struct btrfs_key key;
11504         struct btrfs_extent_item *ei;
11505         struct btrfs_path path;
11506         struct btrfs_root *extent_root = root->fs_info->extent_root;
11507         struct extent_buffer *eb;
11508         u64 size;
11509         u64 disk_bytenr;
11510         u64 num_bytes;
11511         u64 parent;
11512         u64 offset;
11513         u64 extent_offset;
11514         u64 file_offset;
11515         int generation;
11516         int slot;
11517         int ret = 0;
11518
11519         eb = pathp->nodes[0];
11520         slot = pathp->slots[0];
11521         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11522         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11523
11524         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11525             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11526                 return err;
11527
11528         file_offset = fi_key.offset;
11529         generation = btrfs_file_extent_generation(eb, fi);
11530         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11531         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11532         extent_offset = btrfs_file_extent_offset(eb, fi);
11533         offset = file_offset - extent_offset;
11534
11535         /* now repair only adds backref */
11536         if ((err & BACKREF_MISSING) == 0)
11537                 return err;
11538
11539         /* search extent item */
11540         key.objectid = disk_bytenr;
11541         key.type = BTRFS_EXTENT_ITEM_KEY;
11542         key.offset = num_bytes;
11543
11544         btrfs_init_path(&path);
11545         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11546         if (ret < 0) {
11547                 ret = -EIO;
11548                 goto out;
11549         }
11550
11551         /* insert an extent item */
11552         if (ret > 0) {
11553                 key.objectid = disk_bytenr;
11554                 key.type = BTRFS_EXTENT_ITEM_KEY;
11555                 key.offset = num_bytes;
11556                 size = sizeof(*ei);
11557
11558                 btrfs_release_path(&path);
11559                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11560                                               size);
11561                 if (ret)
11562                         goto out;
11563                 eb = path.nodes[0];
11564                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11565
11566                 btrfs_set_extent_refs(eb, ei, 0);
11567                 btrfs_set_extent_generation(eb, ei, generation);
11568                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11569
11570                 btrfs_mark_buffer_dirty(eb);
11571                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11572                                                num_bytes, 1, 0);
11573                 btrfs_release_path(&path);
11574         }
11575
11576         if (nrefs->full_backref[0])
11577                 parent = btrfs_header_bytenr(eb);
11578         else
11579                 parent = 0;
11580
11581         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11582                                    root->objectid,
11583                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11584                                    offset);
11585         if (ret) {
11586                 error(
11587                 "failed to increase extent data backref[%llu %llu] root %llu",
11588                       disk_bytenr, num_bytes, root->objectid);
11589                 goto out;
11590         } else {
11591                 printf("Add one extent data backref [%llu %llu]\n",
11592                        disk_bytenr, num_bytes);
11593         }
11594
11595         err &= ~BACKREF_MISSING;
11596 out:
11597         if (ret)
11598                 error("can't repair root %llu extent data item[%llu %llu]",
11599                       root->objectid, disk_bytenr, num_bytes);
11600         return err;
11601 }
11602
11603 /*
11604  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11605  *
11606  * Return >0 any error found and output error message
11607  * Return 0 for no error found
11608  */
11609 static int check_extent_data_item(struct btrfs_root *root,
11610                                   struct btrfs_path *pathp,
11611                                   struct node_refs *nrefs,  int account_bytes)
11612 {
11613         struct btrfs_file_extent_item *fi;
11614         struct extent_buffer *eb = pathp->nodes[0];
11615         struct btrfs_path path;
11616         struct btrfs_root *extent_root = root->fs_info->extent_root;
11617         struct btrfs_key fi_key;
11618         struct btrfs_key dbref_key;
11619         struct extent_buffer *leaf;
11620         struct btrfs_extent_item *ei;
11621         struct btrfs_extent_inline_ref *iref;
11622         struct btrfs_extent_data_ref *dref;
11623         u64 owner;
11624         u64 disk_bytenr;
11625         u64 disk_num_bytes;
11626         u64 extent_num_bytes;
11627         u64 extent_flags;
11628         u64 offset;
11629         u32 item_size;
11630         unsigned long end;
11631         unsigned long ptr;
11632         int type;
11633         int found_dbackref = 0;
11634         int slot = pathp->slots[0];
11635         int err = 0;
11636         int ret;
11637         int strict;
11638
11639         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11640         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11641
11642         /* Nothing to check for hole and inline data extents */
11643         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11644             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11645                 return 0;
11646
11647         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11648         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11649         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11650         offset = btrfs_file_extent_offset(eb, fi);
11651
11652         /* Check unaligned disk_num_bytes and num_bytes */
11653         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11654                 error(
11655 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11656                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11657                         root->fs_info->sectorsize);
11658                 err |= BYTES_UNALIGNED;
11659         } else if (account_bytes) {
11660                 data_bytes_allocated += disk_num_bytes;
11661         }
11662         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11663                 error(
11664 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11665                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11666                         root->fs_info->sectorsize);
11667                 err |= BYTES_UNALIGNED;
11668         } else if (account_bytes) {
11669                 data_bytes_referenced += extent_num_bytes;
11670         }
11671         owner = btrfs_header_owner(eb);
11672
11673         /* Check the extent item of the file extent in extent tree */
11674         btrfs_init_path(&path);
11675         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11676         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11677         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11678
11679         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11680         if (ret)
11681                 goto out;
11682
11683         leaf = path.nodes[0];
11684         slot = path.slots[0];
11685         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11686
11687         extent_flags = btrfs_extent_flags(leaf, ei);
11688
11689         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11690                 error(
11691                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11692                     disk_bytenr, disk_num_bytes,
11693                     BTRFS_EXTENT_FLAG_DATA);
11694                 err |= BACKREF_MISMATCH;
11695         }
11696
11697         /* Check data backref inside that extent item */
11698         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11699         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11700         ptr = (unsigned long)iref;
11701         end = (unsigned long)ei + item_size;
11702         strict = should_check_extent_strictly(root, nrefs, -1);
11703
11704         while (ptr < end) {
11705                 u64 ref_root;
11706                 u64 ref_objectid;
11707                 u64 ref_offset;
11708                 bool match = false;
11709
11710                 iref = (struct btrfs_extent_inline_ref *)ptr;
11711                 type = btrfs_extent_inline_ref_type(leaf, iref);
11712                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11713
11714                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11715                 if (ret) {
11716                         err |= ret;
11717                         break;
11718                 }
11719                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11720                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11721                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11722                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11723
11724                         if (ref_objectid == fi_key.objectid &&
11725                             ref_offset == fi_key.offset - offset)
11726                                 match = true;
11727                         if (ref_root == root->objectid && match)
11728                                 found_dbackref = 1;
11729                         else if (!strict && owner == ref_root && match)
11730                                 found_dbackref = 1;
11731                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11732                         found_dbackref = !check_tree_block_ref(root, NULL,
11733                                 btrfs_extent_inline_ref_offset(leaf, iref),
11734                                 0, owner, NULL);
11735                 }
11736
11737                 if (found_dbackref)
11738                         break;
11739                 ptr += btrfs_extent_inline_ref_size(type);
11740         }
11741
11742         if (!found_dbackref) {
11743                 btrfs_release_path(&path);
11744
11745                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11746                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11747                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11748                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11749                                 fi_key.objectid, fi_key.offset - offset);
11750
11751                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11752                                         &dbref_key, &path, 0, 0);
11753                 if (!ret) {
11754                         found_dbackref = 1;
11755                         goto out;
11756                 }
11757
11758                 btrfs_release_path(&path);
11759
11760                 /*
11761                  * Neither inlined nor EXTENT_DATA_REF found, try
11762                  * SHARED_DATA_REF as last chance.
11763                  */
11764                 dbref_key.objectid = disk_bytenr;
11765                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11766                 dbref_key.offset = eb->start;
11767
11768                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11769                                         &dbref_key, &path, 0, 0);
11770                 if (!ret) {
11771                         found_dbackref = 1;
11772                         goto out;
11773                 }
11774         }
11775
11776 out:
11777         if (!found_dbackref)
11778                 err |= BACKREF_MISSING;
11779         btrfs_release_path(&path);
11780         if (err & BACKREF_MISSING) {
11781                 error("data extent[%llu %llu] backref lost",
11782                       disk_bytenr, disk_num_bytes);
11783         }
11784         return err;
11785 }
11786
11787 /*
11788  * Get real tree block level for the case like shared block
11789  * Return >= 0 as tree level
11790  * Return <0 for error
11791  */
11792 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11793 {
11794         struct extent_buffer *eb;
11795         struct btrfs_path path;
11796         struct btrfs_key key;
11797         struct btrfs_extent_item *ei;
11798         u64 flags;
11799         u64 transid;
11800         u8 backref_level;
11801         u8 header_level;
11802         int ret;
11803
11804         /* Search extent tree for extent generation and level */
11805         key.objectid = bytenr;
11806         key.type = BTRFS_METADATA_ITEM_KEY;
11807         key.offset = (u64)-1;
11808
11809         btrfs_init_path(&path);
11810         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11811         if (ret < 0)
11812                 goto release_out;
11813         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11814         if (ret < 0)
11815                 goto release_out;
11816         if (ret > 0) {
11817                 ret = -ENOENT;
11818                 goto release_out;
11819         }
11820
11821         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11822         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11823                             struct btrfs_extent_item);
11824         flags = btrfs_extent_flags(path.nodes[0], ei);
11825         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11826                 ret = -ENOENT;
11827                 goto release_out;
11828         }
11829
11830         /* Get transid for later read_tree_block() check */
11831         transid = btrfs_extent_generation(path.nodes[0], ei);
11832
11833         /* Get backref level as one source */
11834         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11835                 backref_level = key.offset;
11836         } else {
11837                 struct btrfs_tree_block_info *info;
11838
11839                 info = (struct btrfs_tree_block_info *)(ei + 1);
11840                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11841         }
11842         btrfs_release_path(&path);
11843
11844         /* Get level from tree block as an alternative source */
11845         eb = read_tree_block(fs_info, bytenr, transid);
11846         if (!extent_buffer_uptodate(eb)) {
11847                 free_extent_buffer(eb);
11848                 return -EIO;
11849         }
11850         header_level = btrfs_header_level(eb);
11851         free_extent_buffer(eb);
11852
11853         if (header_level != backref_level)
11854                 return -EIO;
11855         return header_level;
11856
11857 release_out:
11858         btrfs_release_path(&path);
11859         return ret;
11860 }
11861
11862 /*
11863  * Check if a tree block backref is valid (points to a valid tree block)
11864  * if level == -1, level will be resolved
11865  * Return >0 for any error found and print error message
11866  */
11867 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11868                                     u64 bytenr, int level)
11869 {
11870         struct btrfs_root *root;
11871         struct btrfs_key key;
11872         struct btrfs_path path;
11873         struct extent_buffer *eb;
11874         struct extent_buffer *node;
11875         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11876         int err = 0;
11877         int ret;
11878
11879         /* Query level for level == -1 special case */
11880         if (level == -1)
11881                 level = query_tree_block_level(fs_info, bytenr);
11882         if (level < 0) {
11883                 err |= REFERENCER_MISSING;
11884                 goto out;
11885         }
11886
11887         key.objectid = root_id;
11888         key.type = BTRFS_ROOT_ITEM_KEY;
11889         key.offset = (u64)-1;
11890
11891         root = btrfs_read_fs_root(fs_info, &key);
11892         if (IS_ERR(root)) {
11893                 err |= REFERENCER_MISSING;
11894                 goto out;
11895         }
11896
11897         /* Read out the tree block to get item/node key */
11898         eb = read_tree_block(fs_info, bytenr, 0);
11899         if (!extent_buffer_uptodate(eb)) {
11900                 err |= REFERENCER_MISSING;
11901                 free_extent_buffer(eb);
11902                 goto out;
11903         }
11904
11905         /* Empty tree, no need to check key */
11906         if (!btrfs_header_nritems(eb) && !level) {
11907                 free_extent_buffer(eb);
11908                 goto out;
11909         }
11910
11911         if (level)
11912                 btrfs_node_key_to_cpu(eb, &key, 0);
11913         else
11914                 btrfs_item_key_to_cpu(eb, &key, 0);
11915
11916         free_extent_buffer(eb);
11917
11918         btrfs_init_path(&path);
11919         path.lowest_level = level;
11920         /* Search with the first key, to ensure we can reach it */
11921         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11922         if (ret < 0) {
11923                 err |= REFERENCER_MISSING;
11924                 goto release_out;
11925         }
11926
11927         node = path.nodes[level];
11928         if (btrfs_header_bytenr(node) != bytenr) {
11929                 error(
11930         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11931                         bytenr, nodesize, bytenr,
11932                         btrfs_header_bytenr(node));
11933                 err |= REFERENCER_MISMATCH;
11934         }
11935         if (btrfs_header_level(node) != level) {
11936                 error(
11937         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11938                         bytenr, nodesize, level,
11939                         btrfs_header_level(node));
11940                 err |= REFERENCER_MISMATCH;
11941         }
11942
11943 release_out:
11944         btrfs_release_path(&path);
11945 out:
11946         if (err & REFERENCER_MISSING) {
11947                 if (level < 0)
11948                         error("extent [%llu %d] lost referencer (owner: %llu)",
11949                                 bytenr, nodesize, root_id);
11950                 else
11951                         error(
11952                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11953                                 bytenr, nodesize, root_id, level);
11954         }
11955
11956         return err;
11957 }
11958
11959 /*
11960  * Check if tree block @eb is tree reloc root.
11961  * Return 0 if it's not or any problem happens
11962  * Return 1 if it's a tree reloc root
11963  */
11964 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11965                                  struct extent_buffer *eb)
11966 {
11967         struct btrfs_root *tree_reloc_root;
11968         struct btrfs_key key;
11969         u64 bytenr = btrfs_header_bytenr(eb);
11970         u64 owner = btrfs_header_owner(eb);
11971         int ret = 0;
11972
11973         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11974         key.offset = owner;
11975         key.type = BTRFS_ROOT_ITEM_KEY;
11976
11977         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11978         if (IS_ERR(tree_reloc_root))
11979                 return 0;
11980
11981         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11982                 ret = 1;
11983         btrfs_free_fs_root(tree_reloc_root);
11984         return ret;
11985 }
11986
11987 /*
11988  * Check referencer for shared block backref
11989  * If level == -1, this function will resolve the level.
11990  */
11991 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11992                                      u64 parent, u64 bytenr, int level)
11993 {
11994         struct extent_buffer *eb;
11995         u32 nr;
11996         int found_parent = 0;
11997         int i;
11998
11999         eb = read_tree_block(fs_info, parent, 0);
12000         if (!extent_buffer_uptodate(eb))
12001                 goto out;
12002
12003         if (level == -1)
12004                 level = query_tree_block_level(fs_info, bytenr);
12005         if (level < 0)
12006                 goto out;
12007
12008         /* It's possible it's a tree reloc root */
12009         if (parent == bytenr) {
12010                 if (is_tree_reloc_root(fs_info, eb))
12011                         found_parent = 1;
12012                 goto out;
12013         }
12014
12015         if (level + 1 != btrfs_header_level(eb))
12016                 goto out;
12017
12018         nr = btrfs_header_nritems(eb);
12019         for (i = 0; i < nr; i++) {
12020                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12021                         found_parent = 1;
12022                         break;
12023                 }
12024         }
12025 out:
12026         free_extent_buffer(eb);
12027         if (!found_parent) {
12028                 error(
12029         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12030                         bytenr, fs_info->nodesize, parent, level);
12031                 return REFERENCER_MISSING;
12032         }
12033         return 0;
12034 }
12035
12036 /*
12037  * Check referencer for normal (inlined) data ref
12038  * If len == 0, it will be resolved by searching in extent tree
12039  */
12040 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12041                                      u64 root_id, u64 objectid, u64 offset,
12042                                      u64 bytenr, u64 len, u32 count)
12043 {
12044         struct btrfs_root *root;
12045         struct btrfs_root *extent_root = fs_info->extent_root;
12046         struct btrfs_key key;
12047         struct btrfs_path path;
12048         struct extent_buffer *leaf;
12049         struct btrfs_file_extent_item *fi;
12050         u32 found_count = 0;
12051         int slot;
12052         int ret = 0;
12053
12054         if (!len) {
12055                 key.objectid = bytenr;
12056                 key.type = BTRFS_EXTENT_ITEM_KEY;
12057                 key.offset = (u64)-1;
12058
12059                 btrfs_init_path(&path);
12060                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12061                 if (ret < 0)
12062                         goto out;
12063                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12064                 if (ret)
12065                         goto out;
12066                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12067                 if (key.objectid != bytenr ||
12068                     key.type != BTRFS_EXTENT_ITEM_KEY)
12069                         goto out;
12070                 len = key.offset;
12071                 btrfs_release_path(&path);
12072         }
12073         key.objectid = root_id;
12074         key.type = BTRFS_ROOT_ITEM_KEY;
12075         key.offset = (u64)-1;
12076         btrfs_init_path(&path);
12077
12078         root = btrfs_read_fs_root(fs_info, &key);
12079         if (IS_ERR(root))
12080                 goto out;
12081
12082         key.objectid = objectid;
12083         key.type = BTRFS_EXTENT_DATA_KEY;
12084         /*
12085          * It can be nasty as data backref offset is
12086          * file offset - file extent offset, which is smaller or
12087          * equal to original backref offset.  The only special case is
12088          * overflow.  So we need to special check and do further search.
12089          */
12090         key.offset = offset & (1ULL << 63) ? 0 : offset;
12091
12092         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12093         if (ret < 0)
12094                 goto out;
12095
12096         /*
12097          * Search afterwards to get correct one
12098          * NOTE: As we must do a comprehensive check on the data backref to
12099          * make sure the dref count also matches, we must iterate all file
12100          * extents for that inode.
12101          */
12102         while (1) {
12103                 leaf = path.nodes[0];
12104                 slot = path.slots[0];
12105
12106                 if (slot >= btrfs_header_nritems(leaf) ||
12107                     btrfs_header_owner(leaf) != root_id)
12108                         goto next;
12109                 btrfs_item_key_to_cpu(leaf, &key, slot);
12110                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12111                         break;
12112                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12113                 /*
12114                  * Except normal disk bytenr and disk num bytes, we still
12115                  * need to do extra check on dbackref offset as
12116                  * dbackref offset = file_offset - file_extent_offset
12117                  *
12118                  * Also, we must check the leaf owner.
12119                  * In case of shared tree blocks (snapshots) we can inherit
12120                  * leaves from source snapshot.
12121                  * In that case, reference from source snapshot should not
12122                  * count.
12123                  */
12124                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12125                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12126                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12127                     offset && btrfs_header_owner(leaf) == root_id)
12128                         found_count++;
12129
12130 next:
12131                 ret = btrfs_next_item(root, &path);
12132                 if (ret)
12133                         break;
12134         }
12135 out:
12136         btrfs_release_path(&path);
12137         if (found_count != count) {
12138                 error(
12139 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12140                         bytenr, len, root_id, objectid, offset, count, found_count);
12141                 return REFERENCER_MISSING;
12142         }
12143         return 0;
12144 }
12145
12146 /*
12147  * Check if the referencer of a shared data backref exists
12148  */
12149 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12150                                      u64 parent, u64 bytenr)
12151 {
12152         struct extent_buffer *eb;
12153         struct btrfs_key key;
12154         struct btrfs_file_extent_item *fi;
12155         u32 nr;
12156         int found_parent = 0;
12157         int i;
12158
12159         eb = read_tree_block(fs_info, parent, 0);
12160         if (!extent_buffer_uptodate(eb))
12161                 goto out;
12162
12163         nr = btrfs_header_nritems(eb);
12164         for (i = 0; i < nr; i++) {
12165                 btrfs_item_key_to_cpu(eb, &key, i);
12166                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12167                         continue;
12168
12169                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12170                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12171                         continue;
12172
12173                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12174                         found_parent = 1;
12175                         break;
12176                 }
12177         }
12178
12179 out:
12180         free_extent_buffer(eb);
12181         if (!found_parent) {
12182                 error("shared extent %llu referencer lost (parent: %llu)",
12183                         bytenr, parent);
12184                 return REFERENCER_MISSING;
12185         }
12186         return 0;
12187 }
12188
12189 /*
12190  * Only delete backref if REFERENCER_MISSING now
12191  *
12192  * Returns <0   the extent was deleted
12193  * Returns >0   the backref was deleted but extent still exists, returned value
12194  *               means error after repair
12195  * Returns  0   nothing happened
12196  */
12197 static int repair_extent_item(struct btrfs_trans_handle *trans,
12198                       struct btrfs_root *root, struct btrfs_path *path,
12199                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12200                       u64 owner, u64 offset, int err)
12201 {
12202         struct btrfs_key old_key;
12203         int freed = 0;
12204         int ret;
12205
12206         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12207
12208         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12209                 /* delete the backref */
12210                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12211                           num_bytes, parent, root_objectid, owner, offset);
12212                 if (!ret) {
12213                         freed = 1;
12214                         err &= ~REFERENCER_MISSING;
12215                         printf("Delete backref in extent [%llu %llu]\n",
12216                                bytenr, num_bytes);
12217                 } else {
12218                         error("fail to delete backref in extent [%llu %llu]",
12219                                bytenr, num_bytes);
12220                 }
12221         }
12222
12223         /* btrfs_free_extent may delete the extent */
12224         btrfs_release_path(path);
12225         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12226
12227         if (ret)
12228                 ret = -ENOENT;
12229         else if (freed)
12230                 ret = err;
12231         return ret;
12232 }
12233
12234 /*
12235  * This function will check a given extent item, including its backref and
12236  * itself (like crossing stripe boundary and type)
12237  *
12238  * Since we don't use extent_record anymore, introduce new error bit
12239  */
12240 static int check_extent_item(struct btrfs_trans_handle *trans,
12241                              struct btrfs_fs_info *fs_info,
12242                              struct btrfs_path *path)
12243 {
12244         struct btrfs_extent_item *ei;
12245         struct btrfs_extent_inline_ref *iref;
12246         struct btrfs_extent_data_ref *dref;
12247         struct extent_buffer *eb = path->nodes[0];
12248         unsigned long end;
12249         unsigned long ptr;
12250         int slot = path->slots[0];
12251         int type;
12252         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12253         u32 item_size = btrfs_item_size_nr(eb, slot);
12254         u64 flags;
12255         u64 offset;
12256         u64 parent;
12257         u64 num_bytes;
12258         u64 root_objectid;
12259         u64 owner;
12260         u64 owner_offset;
12261         int metadata = 0;
12262         int level;
12263         struct btrfs_key key;
12264         int ret;
12265         int err = 0;
12266
12267         btrfs_item_key_to_cpu(eb, &key, slot);
12268         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12269                 bytes_used += key.offset;
12270                 num_bytes = key.offset;
12271         } else {
12272                 bytes_used += nodesize;
12273                 num_bytes = nodesize;
12274         }
12275
12276         if (item_size < sizeof(*ei)) {
12277                 /*
12278                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12279                  * old thing when on disk format is still un-determined.
12280                  * No need to care about it anymore
12281                  */
12282                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12283                 return -ENOTTY;
12284         }
12285
12286         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12287         flags = btrfs_extent_flags(eb, ei);
12288
12289         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12290                 metadata = 1;
12291         if (metadata && check_crossing_stripes(global_info, key.objectid,
12292                                                eb->len)) {
12293                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12294                       key.objectid, key.objectid + nodesize);
12295                 err |= CROSSING_STRIPE_BOUNDARY;
12296         }
12297
12298         ptr = (unsigned long)(ei + 1);
12299
12300         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12301                 /* Old EXTENT_ITEM metadata */
12302                 struct btrfs_tree_block_info *info;
12303
12304                 info = (struct btrfs_tree_block_info *)ptr;
12305                 level = btrfs_tree_block_level(eb, info);
12306                 ptr += sizeof(struct btrfs_tree_block_info);
12307         } else {
12308                 /* New METADATA_ITEM */
12309                 level = key.offset;
12310         }
12311         end = (unsigned long)ei + item_size;
12312
12313 next:
12314         /* Reached extent item end normally */
12315         if (ptr == end)
12316                 goto out;
12317
12318         /* Beyond extent item end, wrong item size */
12319         if (ptr > end) {
12320                 err |= ITEM_SIZE_MISMATCH;
12321                 error("extent item at bytenr %llu slot %d has wrong size",
12322                         eb->start, slot);
12323                 goto out;
12324         }
12325
12326         parent = 0;
12327         root_objectid = 0;
12328         owner = 0;
12329         owner_offset = 0;
12330         /* Now check every backref in this extent item */
12331         iref = (struct btrfs_extent_inline_ref *)ptr;
12332         type = btrfs_extent_inline_ref_type(eb, iref);
12333         offset = btrfs_extent_inline_ref_offset(eb, iref);
12334         switch (type) {
12335         case BTRFS_TREE_BLOCK_REF_KEY:
12336                 root_objectid = offset;
12337                 owner = level;
12338                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12339                                                level);
12340                 err |= ret;
12341                 break;
12342         case BTRFS_SHARED_BLOCK_REF_KEY:
12343                 parent = offset;
12344                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12345                                                  level);
12346                 err |= ret;
12347                 break;
12348         case BTRFS_EXTENT_DATA_REF_KEY:
12349                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12350                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12351                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12352                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12353                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12354                                         owner_offset, key.objectid, key.offset,
12355                                         btrfs_extent_data_ref_count(eb, dref));
12356                 err |= ret;
12357                 break;
12358         case BTRFS_SHARED_DATA_REF_KEY:
12359                 parent = offset;
12360                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12361                 err |= ret;
12362                 break;
12363         default:
12364                 error("extent[%llu %d %llu] has unknown ref type: %d",
12365                         key.objectid, key.type, key.offset, type);
12366                 ret = UNKNOWN_TYPE;
12367                 err |= ret;
12368                 goto out;
12369         }
12370
12371         if (err && repair) {
12372                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12373                          key.objectid, num_bytes, parent, root_objectid,
12374                          owner, owner_offset, ret);
12375                 if (ret < 0)
12376                         goto out;
12377                 if (ret) {
12378                         goto next;
12379                         err = ret;
12380                 }
12381         }
12382
12383         ptr += btrfs_extent_inline_ref_size(type);
12384         goto next;
12385
12386 out:
12387         return err;
12388 }
12389
12390 /*
12391  * Check if a dev extent item is referred correctly by its chunk
12392  */
12393 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12394                                  struct extent_buffer *eb, int slot)
12395 {
12396         struct btrfs_root *chunk_root = fs_info->chunk_root;
12397         struct btrfs_dev_extent *ptr;
12398         struct btrfs_path path;
12399         struct btrfs_key chunk_key;
12400         struct btrfs_key devext_key;
12401         struct btrfs_chunk *chunk;
12402         struct extent_buffer *l;
12403         int num_stripes;
12404         u64 length;
12405         int i;
12406         int found_chunk = 0;
12407         int ret;
12408
12409         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12410         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12411         length = btrfs_dev_extent_length(eb, ptr);
12412
12413         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12414         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12415         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12416
12417         btrfs_init_path(&path);
12418         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12419         if (ret)
12420                 goto out;
12421
12422         l = path.nodes[0];
12423         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12424         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12425                                       chunk_key.offset);
12426         if (ret < 0)
12427                 goto out;
12428
12429         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12430                 goto out;
12431
12432         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12433         for (i = 0; i < num_stripes; i++) {
12434                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12435                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12436
12437                 if (devid == devext_key.objectid &&
12438                     offset == devext_key.offset) {
12439                         found_chunk = 1;
12440                         break;
12441                 }
12442         }
12443 out:
12444         btrfs_release_path(&path);
12445         if (!found_chunk) {
12446                 error(
12447                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12448                         devext_key.objectid, devext_key.offset, length);
12449                 return REFERENCER_MISSING;
12450         }
12451         return 0;
12452 }
12453
12454 /*
12455  * Check if the used space is correct with the dev item
12456  */
12457 static int check_dev_item(struct btrfs_fs_info *fs_info,
12458                           struct extent_buffer *eb, int slot)
12459 {
12460         struct btrfs_root *dev_root = fs_info->dev_root;
12461         struct btrfs_dev_item *dev_item;
12462         struct btrfs_path path;
12463         struct btrfs_key key;
12464         struct btrfs_dev_extent *ptr;
12465         u64 total_bytes;
12466         u64 dev_id;
12467         u64 used;
12468         u64 total = 0;
12469         int ret;
12470
12471         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12472         dev_id = btrfs_device_id(eb, dev_item);
12473         used = btrfs_device_bytes_used(eb, dev_item);
12474         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12475
12476         key.objectid = dev_id;
12477         key.type = BTRFS_DEV_EXTENT_KEY;
12478         key.offset = 0;
12479
12480         btrfs_init_path(&path);
12481         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12482         if (ret < 0) {
12483                 btrfs_item_key_to_cpu(eb, &key, slot);
12484                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12485                         key.objectid, key.type, key.offset);
12486                 btrfs_release_path(&path);
12487                 return REFERENCER_MISSING;
12488         }
12489
12490         /* Iterate dev_extents to calculate the used space of a device */
12491         while (1) {
12492                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12493                         goto next;
12494
12495                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12496                 if (key.objectid > dev_id)
12497                         break;
12498                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12499                         goto next;
12500
12501                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12502                                      struct btrfs_dev_extent);
12503                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12504 next:
12505                 ret = btrfs_next_item(dev_root, &path);
12506                 if (ret)
12507                         break;
12508         }
12509         btrfs_release_path(&path);
12510
12511         if (used != total) {
12512                 btrfs_item_key_to_cpu(eb, &key, slot);
12513                 error(
12514 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12515                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12516                         BTRFS_DEV_EXTENT_KEY, dev_id);
12517                 return ACCOUNTING_MISMATCH;
12518         }
12519         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12520
12521         return 0;
12522 }
12523
12524 /*
12525  * Check a block group item with its referener (chunk) and its used space
12526  * with extent/metadata item
12527  */
12528 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12529                                   struct extent_buffer *eb, int slot)
12530 {
12531         struct btrfs_root *extent_root = fs_info->extent_root;
12532         struct btrfs_root *chunk_root = fs_info->chunk_root;
12533         struct btrfs_block_group_item *bi;
12534         struct btrfs_block_group_item bg_item;
12535         struct btrfs_path path;
12536         struct btrfs_key bg_key;
12537         struct btrfs_key chunk_key;
12538         struct btrfs_key extent_key;
12539         struct btrfs_chunk *chunk;
12540         struct extent_buffer *leaf;
12541         struct btrfs_extent_item *ei;
12542         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12543         u64 flags;
12544         u64 bg_flags;
12545         u64 used;
12546         u64 total = 0;
12547         int ret;
12548         int err = 0;
12549
12550         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12551         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12552         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12553         used = btrfs_block_group_used(&bg_item);
12554         bg_flags = btrfs_block_group_flags(&bg_item);
12555
12556         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12557         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12558         chunk_key.offset = bg_key.objectid;
12559
12560         btrfs_init_path(&path);
12561         /* Search for the referencer chunk */
12562         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12563         if (ret) {
12564                 error(
12565                 "block group[%llu %llu] did not find the related chunk item",
12566                         bg_key.objectid, bg_key.offset);
12567                 err |= REFERENCER_MISSING;
12568         } else {
12569                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12570                                         struct btrfs_chunk);
12571                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12572                                                 bg_key.offset) {
12573                         error(
12574         "block group[%llu %llu] related chunk item length does not match",
12575                                 bg_key.objectid, bg_key.offset);
12576                         err |= REFERENCER_MISMATCH;
12577                 }
12578         }
12579         btrfs_release_path(&path);
12580
12581         /* Search from the block group bytenr */
12582         extent_key.objectid = bg_key.objectid;
12583         extent_key.type = 0;
12584         extent_key.offset = 0;
12585
12586         btrfs_init_path(&path);
12587         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12588         if (ret < 0)
12589                 goto out;
12590
12591         /* Iterate extent tree to account used space */
12592         while (1) {
12593                 leaf = path.nodes[0];
12594
12595                 /* Search slot can point to the last item beyond leaf nritems */
12596                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12597                         goto next;
12598
12599                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12600                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12601                         break;
12602
12603                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12604                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12605                         goto next;
12606                 if (extent_key.objectid < bg_key.objectid)
12607                         goto next;
12608
12609                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12610                         total += nodesize;
12611                 else
12612                         total += extent_key.offset;
12613
12614                 ei = btrfs_item_ptr(leaf, path.slots[0],
12615                                     struct btrfs_extent_item);
12616                 flags = btrfs_extent_flags(leaf, ei);
12617                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12618                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12619                                 error(
12620                         "bad extent[%llu, %llu) type mismatch with chunk",
12621                                         extent_key.objectid,
12622                                         extent_key.objectid + extent_key.offset);
12623                                 err |= CHUNK_TYPE_MISMATCH;
12624                         }
12625                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12626                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12627                                     BTRFS_BLOCK_GROUP_METADATA))) {
12628                                 error(
12629                         "bad extent[%llu, %llu) type mismatch with chunk",
12630                                         extent_key.objectid,
12631                                         extent_key.objectid + nodesize);
12632                                 err |= CHUNK_TYPE_MISMATCH;
12633                         }
12634                 }
12635 next:
12636                 ret = btrfs_next_item(extent_root, &path);
12637                 if (ret)
12638                         break;
12639         }
12640
12641 out:
12642         btrfs_release_path(&path);
12643
12644         if (total != used) {
12645                 error(
12646                 "block group[%llu %llu] used %llu but extent items used %llu",
12647                         bg_key.objectid, bg_key.offset, used, total);
12648                 err |= BG_ACCOUNTING_ERROR;
12649         }
12650         return err;
12651 }
12652
12653 /*
12654  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12655  * FIXME: We still need to repair error of dev_item.
12656  *
12657  * Returns error after repair.
12658  */
12659 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12660                              struct btrfs_root *chunk_root,
12661                              struct btrfs_path *path, int err)
12662 {
12663         struct btrfs_chunk *chunk;
12664         struct btrfs_key chunk_key;
12665         struct extent_buffer *eb = path->nodes[0];
12666         u64 length;
12667         int slot = path->slots[0];
12668         u64 type;
12669         int ret = 0;
12670
12671         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12672         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12673                 return err;
12674         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12675         type = btrfs_chunk_type(path->nodes[0], chunk);
12676         length = btrfs_chunk_length(eb, chunk);
12677
12678         if (err & REFERENCER_MISSING) {
12679                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12680                                              type, chunk_key.offset, length);
12681                 if (ret) {
12682                         error("fail to add block group item[%llu %llu]",
12683                               chunk_key.offset, length);
12684                         goto out;
12685                 } else {
12686                         err &= ~REFERENCER_MISSING;
12687                         printf("Added block group item[%llu %llu]\n",
12688                                chunk_key.offset, length);
12689                 }
12690         }
12691
12692 out:
12693         return err;
12694 }
12695
12696 /*
12697  * Check a chunk item.
12698  * Including checking all referred dev_extents and block group
12699  */
12700 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12701                             struct extent_buffer *eb, int slot)
12702 {
12703         struct btrfs_root *extent_root = fs_info->extent_root;
12704         struct btrfs_root *dev_root = fs_info->dev_root;
12705         struct btrfs_path path;
12706         struct btrfs_key chunk_key;
12707         struct btrfs_key bg_key;
12708         struct btrfs_key devext_key;
12709         struct btrfs_chunk *chunk;
12710         struct extent_buffer *leaf;
12711         struct btrfs_block_group_item *bi;
12712         struct btrfs_block_group_item bg_item;
12713         struct btrfs_dev_extent *ptr;
12714         u64 length;
12715         u64 chunk_end;
12716         u64 stripe_len;
12717         u64 type;
12718         int num_stripes;
12719         u64 offset;
12720         u64 objectid;
12721         int i;
12722         int ret;
12723         int err = 0;
12724
12725         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12726         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12727         length = btrfs_chunk_length(eb, chunk);
12728         chunk_end = chunk_key.offset + length;
12729         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12730                                       chunk_key.offset);
12731         if (ret < 0) {
12732                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12733                         chunk_end);
12734                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12735                 goto out;
12736         }
12737         type = btrfs_chunk_type(eb, chunk);
12738
12739         bg_key.objectid = chunk_key.offset;
12740         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12741         bg_key.offset = length;
12742
12743         btrfs_init_path(&path);
12744         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12745         if (ret) {
12746                 error(
12747                 "chunk[%llu %llu) did not find the related block group item",
12748                         chunk_key.offset, chunk_end);
12749                 err |= REFERENCER_MISSING;
12750         } else{
12751                 leaf = path.nodes[0];
12752                 bi = btrfs_item_ptr(leaf, path.slots[0],
12753                                     struct btrfs_block_group_item);
12754                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12755                                    sizeof(bg_item));
12756                 if (btrfs_block_group_flags(&bg_item) != type) {
12757                         error(
12758 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12759                                 chunk_key.offset, chunk_end, type,
12760                                 btrfs_block_group_flags(&bg_item));
12761                         err |= REFERENCER_MISSING;
12762                 }
12763         }
12764
12765         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12766         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12767         for (i = 0; i < num_stripes; i++) {
12768                 btrfs_release_path(&path);
12769                 btrfs_init_path(&path);
12770                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12771                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12772                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12773
12774                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12775                                         0, 0);
12776                 if (ret)
12777                         goto not_match_dev;
12778
12779                 leaf = path.nodes[0];
12780                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12781                                      struct btrfs_dev_extent);
12782                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12783                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12784                 if (objectid != chunk_key.objectid ||
12785                     offset != chunk_key.offset ||
12786                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12787                         goto not_match_dev;
12788                 continue;
12789 not_match_dev:
12790                 err |= BACKREF_MISSING;
12791                 error(
12792                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12793                         chunk_key.objectid, chunk_end, i);
12794                 continue;
12795         }
12796         btrfs_release_path(&path);
12797 out:
12798         return err;
12799 }
12800
12801 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12802                                    struct btrfs_root *root,
12803                                    struct btrfs_path *path)
12804 {
12805         struct btrfs_key key;
12806         int ret = 0;
12807
12808         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12809         btrfs_release_path(path);
12810         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12811         if (ret) {
12812                 ret = -ENOENT;
12813                 goto out;
12814         }
12815
12816         ret = btrfs_del_item(trans, root, path);
12817         if (ret)
12818                 goto out;
12819
12820         if (path->slots[0] == 0)
12821                 btrfs_prev_leaf(root, path);
12822         else
12823                 path->slots[0]--;
12824 out:
12825         if (ret)
12826                 error("failed to delete root %llu item[%llu, %u, %llu]",
12827                       root->objectid, key.objectid, key.type, key.offset);
12828         else
12829                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12830                        root->objectid, key.objectid, key.type, key.offset);
12831         return ret;
12832 }
12833
12834 /*
12835  * Main entry function to check known items and update related accounting info
12836  */
12837 static int check_leaf_items(struct btrfs_trans_handle *trans,
12838                             struct btrfs_root *root, struct btrfs_path *path,
12839                             struct node_refs *nrefs, int account_bytes)
12840 {
12841         struct btrfs_fs_info *fs_info = root->fs_info;
12842         struct btrfs_key key;
12843         struct extent_buffer *eb;
12844         int slot;
12845         int type;
12846         struct btrfs_extent_data_ref *dref;
12847         int ret = 0;
12848         int err = 0;
12849
12850 again:
12851         eb = path->nodes[0];
12852         slot = path->slots[0];
12853         if (slot >= btrfs_header_nritems(eb)) {
12854                 if (slot == 0) {
12855                         error("empty leaf [%llu %u] root %llu", eb->start,
12856                                 root->fs_info->nodesize, root->objectid);
12857                         err |= EIO;
12858                 }
12859                 goto out;
12860         }
12861
12862         btrfs_item_key_to_cpu(eb, &key, slot);
12863         type = key.type;
12864
12865         switch (type) {
12866         case BTRFS_EXTENT_DATA_KEY:
12867                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12868                 if (repair && ret)
12869                         ret = repair_extent_data_item(trans, root, path, nrefs,
12870                                                       ret);
12871                 err |= ret;
12872                 break;
12873         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12874                 ret = check_block_group_item(fs_info, eb, slot);
12875                 if (repair &&
12876                     ret & REFERENCER_MISSING)
12877                         ret = delete_extent_tree_item(trans, root, path);
12878                 err |= ret;
12879                 break;
12880         case BTRFS_DEV_ITEM_KEY:
12881                 ret = check_dev_item(fs_info, eb, slot);
12882                 err |= ret;
12883                 break;
12884         case BTRFS_CHUNK_ITEM_KEY:
12885                 ret = check_chunk_item(fs_info, eb, slot);
12886                 if (repair && ret)
12887                         ret = repair_chunk_item(trans, root, path, ret);
12888                 err |= ret;
12889                 break;
12890         case BTRFS_DEV_EXTENT_KEY:
12891                 ret = check_dev_extent_item(fs_info, eb, slot);
12892                 err |= ret;
12893                 break;
12894         case BTRFS_EXTENT_ITEM_KEY:
12895         case BTRFS_METADATA_ITEM_KEY:
12896                 ret = check_extent_item(trans, fs_info, path);
12897                 err |= ret;
12898                 break;
12899         case BTRFS_EXTENT_CSUM_KEY:
12900                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12901                 err |= ret;
12902                 break;
12903         case BTRFS_TREE_BLOCK_REF_KEY:
12904                 ret = check_tree_block_backref(fs_info, key.offset,
12905                                                key.objectid, -1);
12906                 if (repair &&
12907                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12908                         ret = delete_extent_tree_item(trans, root, path);
12909                 err |= ret;
12910                 break;
12911         case BTRFS_EXTENT_DATA_REF_KEY:
12912                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12913                 ret = check_extent_data_backref(fs_info,
12914                                 btrfs_extent_data_ref_root(eb, dref),
12915                                 btrfs_extent_data_ref_objectid(eb, dref),
12916                                 btrfs_extent_data_ref_offset(eb, dref),
12917                                 key.objectid, 0,
12918                                 btrfs_extent_data_ref_count(eb, dref));
12919                 if (repair &&
12920                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12921                         ret = delete_extent_tree_item(trans, root, path);
12922                 err |= ret;
12923                 break;
12924         case BTRFS_SHARED_BLOCK_REF_KEY:
12925                 ret = check_shared_block_backref(fs_info, key.offset,
12926                                                  key.objectid, -1);
12927                 if (repair &&
12928                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12929                         ret = delete_extent_tree_item(trans, root, path);
12930                 err |= ret;
12931                 break;
12932         case BTRFS_SHARED_DATA_REF_KEY:
12933                 ret = check_shared_data_backref(fs_info, key.offset,
12934                                                 key.objectid);
12935                 if (repair &&
12936                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12937                         ret = delete_extent_tree_item(trans, root, path);
12938                 err |= ret;
12939                 break;
12940         default:
12941                 break;
12942         }
12943
12944         ++path->slots[0];
12945         goto again;
12946 out:
12947         return err;
12948 }
12949
12950 /*
12951  * Low memory usage version check_chunks_and_extents.
12952  */
12953 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12954 {
12955         struct btrfs_trans_handle *trans = NULL;
12956         struct btrfs_path path;
12957         struct btrfs_key old_key;
12958         struct btrfs_key key;
12959         struct btrfs_root *root1;
12960         struct btrfs_root *root;
12961         struct btrfs_root *cur_root;
12962         int err = 0;
12963         int ret;
12964
12965         root = fs_info->fs_root;
12966
12967         if (repair) {
12968                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12969                 if (IS_ERR(trans)) {
12970                         error("failed to start transaction before check");
12971                         return PTR_ERR(trans);
12972                 }
12973         }
12974
12975         root1 = root->fs_info->chunk_root;
12976         ret = check_btrfs_root(trans, root1, 0, 1);
12977         err |= ret;
12978
12979         root1 = root->fs_info->tree_root;
12980         ret = check_btrfs_root(trans, root1, 0, 1);
12981         err |= ret;
12982
12983         btrfs_init_path(&path);
12984         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12985         key.offset = 0;
12986         key.type = BTRFS_ROOT_ITEM_KEY;
12987
12988         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12989         if (ret) {
12990                 error("cannot find extent tree in tree_root");
12991                 goto out;
12992         }
12993
12994         while (1) {
12995                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12996                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12997                         goto next;
12998                 old_key = key;
12999                 key.offset = (u64)-1;
13000
13001                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13002                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13003                                         &key);
13004                 else
13005                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13006                 if (IS_ERR(cur_root) || !cur_root) {
13007                         error("failed to read tree: %lld", key.objectid);
13008                         goto next;
13009                 }
13010
13011                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13012                 err |= ret;
13013
13014                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13015                         btrfs_free_fs_root(cur_root);
13016
13017                 btrfs_release_path(&path);
13018                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13019                                         &old_key, &path, 0, 0);
13020                 if (ret)
13021                         goto out;
13022 next:
13023                 ret = btrfs_next_item(root1, &path);
13024                 if (ret)
13025                         goto out;
13026         }
13027 out:
13028
13029         /* if repair, update block accounting */
13030         if (repair) {
13031                 ret = btrfs_fix_block_accounting(trans, root);
13032                 if (ret)
13033                         err |= ret;
13034                 else
13035                         err &= ~BG_ACCOUNTING_ERROR;
13036         }
13037
13038         if (trans)
13039                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13040
13041         btrfs_release_path(&path);
13042
13043         return err;
13044 }
13045
13046 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13047 {
13048         int ret;
13049
13050         if (!ctx.progress_enabled)
13051                 fprintf(stderr, "checking extents\n");
13052         if (check_mode == CHECK_MODE_LOWMEM)
13053                 ret = check_chunks_and_extents_v2(fs_info);
13054         else
13055                 ret = check_chunks_and_extents(fs_info);
13056
13057         /* Also repair device size related problems */
13058         if (repair && !ret) {
13059                 ret = btrfs_fix_device_and_super_size(fs_info);
13060                 if (ret > 0)
13061                         ret = 0;
13062         }
13063         return ret;
13064 }
13065
13066 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13067                            struct btrfs_root *root, int overwrite)
13068 {
13069         struct extent_buffer *c;
13070         struct extent_buffer *old = root->node;
13071         int level;
13072         int ret;
13073         struct btrfs_disk_key disk_key = {0,0,0};
13074
13075         level = 0;
13076
13077         if (overwrite) {
13078                 c = old;
13079                 extent_buffer_get(c);
13080                 goto init;
13081         }
13082         c = btrfs_alloc_free_block(trans, root,
13083                                    root->fs_info->nodesize,
13084                                    root->root_key.objectid,
13085                                    &disk_key, level, 0, 0);
13086         if (IS_ERR(c)) {
13087                 c = old;
13088                 extent_buffer_get(c);
13089                 overwrite = 1;
13090         }
13091 init:
13092         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13093         btrfs_set_header_level(c, level);
13094         btrfs_set_header_bytenr(c, c->start);
13095         btrfs_set_header_generation(c, trans->transid);
13096         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13097         btrfs_set_header_owner(c, root->root_key.objectid);
13098
13099         write_extent_buffer(c, root->fs_info->fsid,
13100                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13101
13102         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13103                             btrfs_header_chunk_tree_uuid(c),
13104                             BTRFS_UUID_SIZE);
13105
13106         btrfs_mark_buffer_dirty(c);
13107         /*
13108          * this case can happen in the following case:
13109          *
13110          * 1.overwrite previous root.
13111          *
13112          * 2.reinit reloc data root, this is because we skip pin
13113          * down reloc data tree before which means we can allocate
13114          * same block bytenr here.
13115          */
13116         if (old->start == c->start) {
13117                 btrfs_set_root_generation(&root->root_item,
13118                                           trans->transid);
13119                 root->root_item.level = btrfs_header_level(root->node);
13120                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13121                                         &root->root_key, &root->root_item);
13122                 if (ret) {
13123                         free_extent_buffer(c);
13124                         return ret;
13125                 }
13126         }
13127         free_extent_buffer(old);
13128         root->node = c;
13129         add_root_to_dirty_list(root);
13130         return 0;
13131 }
13132
13133 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13134                                 struct extent_buffer *eb, int tree_root)
13135 {
13136         struct extent_buffer *tmp;
13137         struct btrfs_root_item *ri;
13138         struct btrfs_key key;
13139         u64 bytenr;
13140         int level = btrfs_header_level(eb);
13141         int nritems;
13142         int ret;
13143         int i;
13144
13145         /*
13146          * If we have pinned this block before, don't pin it again.
13147          * This can not only avoid forever loop with broken filesystem
13148          * but also give us some speedups.
13149          */
13150         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13151                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13152                 return 0;
13153
13154         btrfs_pin_extent(fs_info, eb->start, eb->len);
13155
13156         nritems = btrfs_header_nritems(eb);
13157         for (i = 0; i < nritems; i++) {
13158                 if (level == 0) {
13159                         btrfs_item_key_to_cpu(eb, &key, i);
13160                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13161                                 continue;
13162                         /* Skip the extent root and reloc roots */
13163                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13164                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13165                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13166                                 continue;
13167                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13168                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13169
13170                         /*
13171                          * If at any point we start needing the real root we
13172                          * will have to build a stump root for the root we are
13173                          * in, but for now this doesn't actually use the root so
13174                          * just pass in extent_root.
13175                          */
13176                         tmp = read_tree_block(fs_info, bytenr, 0);
13177                         if (!extent_buffer_uptodate(tmp)) {
13178                                 fprintf(stderr, "Error reading root block\n");
13179                                 return -EIO;
13180                         }
13181                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13182                         free_extent_buffer(tmp);
13183                         if (ret)
13184                                 return ret;
13185                 } else {
13186                         bytenr = btrfs_node_blockptr(eb, i);
13187
13188                         /* If we aren't the tree root don't read the block */
13189                         if (level == 1 && !tree_root) {
13190                                 btrfs_pin_extent(fs_info, bytenr,
13191                                                 fs_info->nodesize);
13192                                 continue;
13193                         }
13194
13195                         tmp = read_tree_block(fs_info, bytenr, 0);
13196                         if (!extent_buffer_uptodate(tmp)) {
13197                                 fprintf(stderr, "Error reading tree block\n");
13198                                 return -EIO;
13199                         }
13200                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13201                         free_extent_buffer(tmp);
13202                         if (ret)
13203                                 return ret;
13204                 }
13205         }
13206
13207         return 0;
13208 }
13209
13210 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13211 {
13212         int ret;
13213
13214         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13215         if (ret)
13216                 return ret;
13217
13218         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13219 }
13220
13221 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13222 {
13223         struct btrfs_block_group_cache *cache;
13224         struct btrfs_path path;
13225         struct extent_buffer *leaf;
13226         struct btrfs_chunk *chunk;
13227         struct btrfs_key key;
13228         int ret;
13229         u64 start;
13230
13231         btrfs_init_path(&path);
13232         key.objectid = 0;
13233         key.type = BTRFS_CHUNK_ITEM_KEY;
13234         key.offset = 0;
13235         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13236         if (ret < 0) {
13237                 btrfs_release_path(&path);
13238                 return ret;
13239         }
13240
13241         /*
13242          * We do this in case the block groups were screwed up and had alloc
13243          * bits that aren't actually set on the chunks.  This happens with
13244          * restored images every time and could happen in real life I guess.
13245          */
13246         fs_info->avail_data_alloc_bits = 0;
13247         fs_info->avail_metadata_alloc_bits = 0;
13248         fs_info->avail_system_alloc_bits = 0;
13249
13250         /* First we need to create the in-memory block groups */
13251         while (1) {
13252                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13253                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13254                         if (ret < 0) {
13255                                 btrfs_release_path(&path);
13256                                 return ret;
13257                         }
13258                         if (ret) {
13259                                 ret = 0;
13260                                 break;
13261                         }
13262                 }
13263                 leaf = path.nodes[0];
13264                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13265                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13266                         path.slots[0]++;
13267                         continue;
13268                 }
13269
13270                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13271                 btrfs_add_block_group(fs_info, 0,
13272                                       btrfs_chunk_type(leaf, chunk), key.offset,
13273                                       btrfs_chunk_length(leaf, chunk));
13274                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13275                                  key.offset + btrfs_chunk_length(leaf, chunk));
13276                 path.slots[0]++;
13277         }
13278         start = 0;
13279         while (1) {
13280                 cache = btrfs_lookup_first_block_group(fs_info, start);
13281                 if (!cache)
13282                         break;
13283                 cache->cached = 1;
13284                 start = cache->key.objectid + cache->key.offset;
13285         }
13286
13287         btrfs_release_path(&path);
13288         return 0;
13289 }
13290
13291 static int reset_balance(struct btrfs_trans_handle *trans,
13292                          struct btrfs_fs_info *fs_info)
13293 {
13294         struct btrfs_root *root = fs_info->tree_root;
13295         struct btrfs_path path;
13296         struct extent_buffer *leaf;
13297         struct btrfs_key key;
13298         int del_slot, del_nr = 0;
13299         int ret;
13300         int found = 0;
13301
13302         btrfs_init_path(&path);
13303         key.objectid = BTRFS_BALANCE_OBJECTID;
13304         key.type = BTRFS_BALANCE_ITEM_KEY;
13305         key.offset = 0;
13306         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13307         if (ret) {
13308                 if (ret > 0)
13309                         ret = 0;
13310                 if (!ret)
13311                         goto reinit_data_reloc;
13312                 else
13313                         goto out;
13314         }
13315
13316         ret = btrfs_del_item(trans, root, &path);
13317         if (ret)
13318                 goto out;
13319         btrfs_release_path(&path);
13320
13321         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13322         key.type = BTRFS_ROOT_ITEM_KEY;
13323         key.offset = 0;
13324         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13325         if (ret < 0)
13326                 goto out;
13327         while (1) {
13328                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13329                         if (!found)
13330                                 break;
13331
13332                         if (del_nr) {
13333                                 ret = btrfs_del_items(trans, root, &path,
13334                                                       del_slot, del_nr);
13335                                 del_nr = 0;
13336                                 if (ret)
13337                                         goto out;
13338                         }
13339                         key.offset++;
13340                         btrfs_release_path(&path);
13341
13342                         found = 0;
13343                         ret = btrfs_search_slot(trans, root, &key, &path,
13344                                                 -1, 1);
13345                         if (ret < 0)
13346                                 goto out;
13347                         continue;
13348                 }
13349                 found = 1;
13350                 leaf = path.nodes[0];
13351                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13352                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13353                         break;
13354                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13355                         path.slots[0]++;
13356                         continue;
13357                 }
13358                 if (!del_nr) {
13359                         del_slot = path.slots[0];
13360                         del_nr = 1;
13361                 } else {
13362                         del_nr++;
13363                 }
13364                 path.slots[0]++;
13365         }
13366
13367         if (del_nr) {
13368                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13369                 if (ret)
13370                         goto out;
13371         }
13372         btrfs_release_path(&path);
13373
13374 reinit_data_reloc:
13375         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13376         key.type = BTRFS_ROOT_ITEM_KEY;
13377         key.offset = (u64)-1;
13378         root = btrfs_read_fs_root(fs_info, &key);
13379         if (IS_ERR(root)) {
13380                 fprintf(stderr, "Error reading data reloc tree\n");
13381                 ret = PTR_ERR(root);
13382                 goto out;
13383         }
13384         record_root_in_trans(trans, root);
13385         ret = btrfs_fsck_reinit_root(trans, root, 0);
13386         if (ret)
13387                 goto out;
13388         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13389 out:
13390         btrfs_release_path(&path);
13391         return ret;
13392 }
13393
13394 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13395                               struct btrfs_fs_info *fs_info)
13396 {
13397         u64 start = 0;
13398         int ret;
13399
13400         /*
13401          * The only reason we don't do this is because right now we're just
13402          * walking the trees we find and pinning down their bytes, we don't look
13403          * at any of the leaves.  In order to do mixed groups we'd have to check
13404          * the leaves of any fs roots and pin down the bytes for any file
13405          * extents we find.  Not hard but why do it if we don't have to?
13406          */
13407         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13408                 fprintf(stderr, "We don't support re-initing the extent tree "
13409                         "for mixed block groups yet, please notify a btrfs "
13410                         "developer you want to do this so they can add this "
13411                         "functionality.\n");
13412                 return -EINVAL;
13413         }
13414
13415         /*
13416          * first we need to walk all of the trees except the extent tree and pin
13417          * down the bytes that are in use so we don't overwrite any existing
13418          * metadata.
13419          */
13420         ret = pin_metadata_blocks(fs_info);
13421         if (ret) {
13422                 fprintf(stderr, "error pinning down used bytes\n");
13423                 return ret;
13424         }
13425
13426         /*
13427          * Need to drop all the block groups since we're going to recreate all
13428          * of them again.
13429          */
13430         btrfs_free_block_groups(fs_info);
13431         ret = reset_block_groups(fs_info);
13432         if (ret) {
13433                 fprintf(stderr, "error resetting the block groups\n");
13434                 return ret;
13435         }
13436
13437         /* Ok we can allocate now, reinit the extent root */
13438         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13439         if (ret) {
13440                 fprintf(stderr, "extent root initialization failed\n");
13441                 /*
13442                  * When the transaction code is updated we should end the
13443                  * transaction, but for now progs only knows about commit so
13444                  * just return an error.
13445                  */
13446                 return ret;
13447         }
13448
13449         /*
13450          * Now we have all the in-memory block groups setup so we can make
13451          * allocations properly, and the metadata we care about is safe since we
13452          * pinned all of it above.
13453          */
13454         while (1) {
13455                 struct btrfs_block_group_cache *cache;
13456
13457                 cache = btrfs_lookup_first_block_group(fs_info, start);
13458                 if (!cache)
13459                         break;
13460                 start = cache->key.objectid + cache->key.offset;
13461                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13462                                         &cache->key, &cache->item,
13463                                         sizeof(cache->item));
13464                 if (ret) {
13465                         fprintf(stderr, "Error adding block group\n");
13466                         return ret;
13467                 }
13468                 btrfs_extent_post_op(trans, fs_info->extent_root);
13469         }
13470
13471         ret = reset_balance(trans, fs_info);
13472         if (ret)
13473                 fprintf(stderr, "error resetting the pending balance\n");
13474
13475         return ret;
13476 }
13477
13478 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13479 {
13480         struct btrfs_path path;
13481         struct btrfs_trans_handle *trans;
13482         struct btrfs_key key;
13483         int ret;
13484
13485         printf("Recowing metadata block %llu\n", eb->start);
13486         key.objectid = btrfs_header_owner(eb);
13487         key.type = BTRFS_ROOT_ITEM_KEY;
13488         key.offset = (u64)-1;
13489
13490         root = btrfs_read_fs_root(root->fs_info, &key);
13491         if (IS_ERR(root)) {
13492                 fprintf(stderr, "Couldn't find owner root %llu\n",
13493                         key.objectid);
13494                 return PTR_ERR(root);
13495         }
13496
13497         trans = btrfs_start_transaction(root, 1);
13498         if (IS_ERR(trans))
13499                 return PTR_ERR(trans);
13500
13501         btrfs_init_path(&path);
13502         path.lowest_level = btrfs_header_level(eb);
13503         if (path.lowest_level)
13504                 btrfs_node_key_to_cpu(eb, &key, 0);
13505         else
13506                 btrfs_item_key_to_cpu(eb, &key, 0);
13507
13508         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13509         btrfs_commit_transaction(trans, root);
13510         btrfs_release_path(&path);
13511         return ret;
13512 }
13513
13514 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13515 {
13516         struct btrfs_path path;
13517         struct btrfs_trans_handle *trans;
13518         struct btrfs_key key;
13519         int ret;
13520
13521         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13522                bad->key.type, bad->key.offset);
13523         key.objectid = bad->root_id;
13524         key.type = BTRFS_ROOT_ITEM_KEY;
13525         key.offset = (u64)-1;
13526
13527         root = btrfs_read_fs_root(root->fs_info, &key);
13528         if (IS_ERR(root)) {
13529                 fprintf(stderr, "Couldn't find owner root %llu\n",
13530                         key.objectid);
13531                 return PTR_ERR(root);
13532         }
13533
13534         trans = btrfs_start_transaction(root, 1);
13535         if (IS_ERR(trans))
13536                 return PTR_ERR(trans);
13537
13538         btrfs_init_path(&path);
13539         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13540         if (ret) {
13541                 if (ret > 0)
13542                         ret = 0;
13543                 goto out;
13544         }
13545         ret = btrfs_del_item(trans, root, &path);
13546 out:
13547         btrfs_commit_transaction(trans, root);
13548         btrfs_release_path(&path);
13549         return ret;
13550 }
13551
13552 static int zero_log_tree(struct btrfs_root *root)
13553 {
13554         struct btrfs_trans_handle *trans;
13555         int ret;
13556
13557         trans = btrfs_start_transaction(root, 1);
13558         if (IS_ERR(trans)) {
13559                 ret = PTR_ERR(trans);
13560                 return ret;
13561         }
13562         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13563         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13564         ret = btrfs_commit_transaction(trans, root);
13565         return ret;
13566 }
13567
13568 static int populate_csum(struct btrfs_trans_handle *trans,
13569                          struct btrfs_root *csum_root, char *buf, u64 start,
13570                          u64 len)
13571 {
13572         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13573         u64 offset = 0;
13574         u64 sectorsize;
13575         int ret = 0;
13576
13577         while (offset < len) {
13578                 sectorsize = fs_info->sectorsize;
13579                 ret = read_extent_data(fs_info, buf, start + offset,
13580                                        &sectorsize, 0);
13581                 if (ret)
13582                         break;
13583                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13584                                             start + offset, buf, sectorsize);
13585                 if (ret)
13586                         break;
13587                 offset += sectorsize;
13588         }
13589         return ret;
13590 }
13591
13592 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13593                                       struct btrfs_root *csum_root,
13594                                       struct btrfs_root *cur_root)
13595 {
13596         struct btrfs_path path;
13597         struct btrfs_key key;
13598         struct extent_buffer *node;
13599         struct btrfs_file_extent_item *fi;
13600         char *buf = NULL;
13601         u64 start = 0;
13602         u64 len = 0;
13603         int slot = 0;
13604         int ret = 0;
13605
13606         buf = malloc(cur_root->fs_info->sectorsize);
13607         if (!buf)
13608                 return -ENOMEM;
13609
13610         btrfs_init_path(&path);
13611         key.objectid = 0;
13612         key.offset = 0;
13613         key.type = 0;
13614         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13615         if (ret < 0)
13616                 goto out;
13617         /* Iterate all regular file extents and fill its csum */
13618         while (1) {
13619                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13620
13621                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13622                         goto next;
13623                 node = path.nodes[0];
13624                 slot = path.slots[0];
13625                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13626                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13627                         goto next;
13628                 start = btrfs_file_extent_disk_bytenr(node, fi);
13629                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13630
13631                 ret = populate_csum(trans, csum_root, buf, start, len);
13632                 if (ret == -EEXIST)
13633                         ret = 0;
13634                 if (ret < 0)
13635                         goto out;
13636 next:
13637                 /*
13638                  * TODO: if next leaf is corrupted, jump to nearest next valid
13639                  * leaf.
13640                  */
13641                 ret = btrfs_next_item(cur_root, &path);
13642                 if (ret < 0)
13643                         goto out;
13644                 if (ret > 0) {
13645                         ret = 0;
13646                         goto out;
13647                 }
13648         }
13649
13650 out:
13651         btrfs_release_path(&path);
13652         free(buf);
13653         return ret;
13654 }
13655
13656 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13657                                   struct btrfs_root *csum_root)
13658 {
13659         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13660         struct btrfs_path path;
13661         struct btrfs_root *tree_root = fs_info->tree_root;
13662         struct btrfs_root *cur_root;
13663         struct extent_buffer *node;
13664         struct btrfs_key key;
13665         int slot = 0;
13666         int ret = 0;
13667
13668         btrfs_init_path(&path);
13669         key.objectid = BTRFS_FS_TREE_OBJECTID;
13670         key.offset = 0;
13671         key.type = BTRFS_ROOT_ITEM_KEY;
13672         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13673         if (ret < 0)
13674                 goto out;
13675         if (ret > 0) {
13676                 ret = -ENOENT;
13677                 goto out;
13678         }
13679
13680         while (1) {
13681                 node = path.nodes[0];
13682                 slot = path.slots[0];
13683                 btrfs_item_key_to_cpu(node, &key, slot);
13684                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13685                         goto out;
13686                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13687                         goto next;
13688                 if (!is_fstree(key.objectid))
13689                         goto next;
13690                 key.offset = (u64)-1;
13691
13692                 cur_root = btrfs_read_fs_root(fs_info, &key);
13693                 if (IS_ERR(cur_root) || !cur_root) {
13694                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13695                                 key.objectid);
13696                         goto out;
13697                 }
13698                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13699                                 cur_root);
13700                 if (ret < 0)
13701                         goto out;
13702 next:
13703                 ret = btrfs_next_item(tree_root, &path);
13704                 if (ret > 0) {
13705                         ret = 0;
13706                         goto out;
13707                 }
13708                 if (ret < 0)
13709                         goto out;
13710         }
13711
13712 out:
13713         btrfs_release_path(&path);
13714         return ret;
13715 }
13716
13717 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13718                                       struct btrfs_root *csum_root)
13719 {
13720         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13721         struct btrfs_path path;
13722         struct btrfs_extent_item *ei;
13723         struct extent_buffer *leaf;
13724         char *buf;
13725         struct btrfs_key key;
13726         int ret;
13727
13728         btrfs_init_path(&path);
13729         key.objectid = 0;
13730         key.type = BTRFS_EXTENT_ITEM_KEY;
13731         key.offset = 0;
13732         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13733         if (ret < 0) {
13734                 btrfs_release_path(&path);
13735                 return ret;
13736         }
13737
13738         buf = malloc(csum_root->fs_info->sectorsize);
13739         if (!buf) {
13740                 btrfs_release_path(&path);
13741                 return -ENOMEM;
13742         }
13743
13744         while (1) {
13745                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13746                         ret = btrfs_next_leaf(extent_root, &path);
13747                         if (ret < 0)
13748                                 break;
13749                         if (ret) {
13750                                 ret = 0;
13751                                 break;
13752                         }
13753                 }
13754                 leaf = path.nodes[0];
13755
13756                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13757                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13758                         path.slots[0]++;
13759                         continue;
13760                 }
13761
13762                 ei = btrfs_item_ptr(leaf, path.slots[0],
13763                                     struct btrfs_extent_item);
13764                 if (!(btrfs_extent_flags(leaf, ei) &
13765                       BTRFS_EXTENT_FLAG_DATA)) {
13766                         path.slots[0]++;
13767                         continue;
13768                 }
13769
13770                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13771                                     key.offset);
13772                 if (ret)
13773                         break;
13774                 path.slots[0]++;
13775         }
13776
13777         btrfs_release_path(&path);
13778         free(buf);
13779         return ret;
13780 }
13781
13782 /*
13783  * Recalculate the csum and put it into the csum tree.
13784  *
13785  * Extent tree init will wipe out all the extent info, so in that case, we
13786  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13787  * will use fs/subvol trees to init the csum tree.
13788  */
13789 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13790                           struct btrfs_root *csum_root,
13791                           int search_fs_tree)
13792 {
13793         if (search_fs_tree)
13794                 return fill_csum_tree_from_fs(trans, csum_root);
13795         else
13796                 return fill_csum_tree_from_extent(trans, csum_root);
13797 }
13798
13799 static void free_roots_info_cache(void)
13800 {
13801         if (!roots_info_cache)
13802                 return;
13803
13804         while (!cache_tree_empty(roots_info_cache)) {
13805                 struct cache_extent *entry;
13806                 struct root_item_info *rii;
13807
13808                 entry = first_cache_extent(roots_info_cache);
13809                 if (!entry)
13810                         break;
13811                 remove_cache_extent(roots_info_cache, entry);
13812                 rii = container_of(entry, struct root_item_info, cache_extent);
13813                 free(rii);
13814         }
13815
13816         free(roots_info_cache);
13817         roots_info_cache = NULL;
13818 }
13819
13820 static int build_roots_info_cache(struct btrfs_fs_info *info)
13821 {
13822         int ret = 0;
13823         struct btrfs_key key;
13824         struct extent_buffer *leaf;
13825         struct btrfs_path path;
13826
13827         if (!roots_info_cache) {
13828                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13829                 if (!roots_info_cache)
13830                         return -ENOMEM;
13831                 cache_tree_init(roots_info_cache);
13832         }
13833
13834         btrfs_init_path(&path);
13835         key.objectid = 0;
13836         key.type = BTRFS_EXTENT_ITEM_KEY;
13837         key.offset = 0;
13838         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13839         if (ret < 0)
13840                 goto out;
13841         leaf = path.nodes[0];
13842
13843         while (1) {
13844                 struct btrfs_key found_key;
13845                 struct btrfs_extent_item *ei;
13846                 struct btrfs_extent_inline_ref *iref;
13847                 int slot = path.slots[0];
13848                 int type;
13849                 u64 flags;
13850                 u64 root_id;
13851                 u8 level;
13852                 struct cache_extent *entry;
13853                 struct root_item_info *rii;
13854
13855                 if (slot >= btrfs_header_nritems(leaf)) {
13856                         ret = btrfs_next_leaf(info->extent_root, &path);
13857                         if (ret < 0) {
13858                                 break;
13859                         } else if (ret) {
13860                                 ret = 0;
13861                                 break;
13862                         }
13863                         leaf = path.nodes[0];
13864                         slot = path.slots[0];
13865                 }
13866
13867                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13868
13869                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13870                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13871                         goto next;
13872
13873                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13874                 flags = btrfs_extent_flags(leaf, ei);
13875
13876                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13877                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13878                         goto next;
13879
13880                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13881                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13882                         level = found_key.offset;
13883                 } else {
13884                         struct btrfs_tree_block_info *binfo;
13885
13886                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13887                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13888                         level = btrfs_tree_block_level(leaf, binfo);
13889                 }
13890
13891                 /*
13892                  * For a root extent, it must be of the following type and the
13893                  * first (and only one) iref in the item.
13894                  */
13895                 type = btrfs_extent_inline_ref_type(leaf, iref);
13896                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13897                         goto next;
13898
13899                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13900                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13901                 if (!entry) {
13902                         rii = malloc(sizeof(struct root_item_info));
13903                         if (!rii) {
13904                                 ret = -ENOMEM;
13905                                 goto out;
13906                         }
13907                         rii->cache_extent.start = root_id;
13908                         rii->cache_extent.size = 1;
13909                         rii->level = (u8)-1;
13910                         entry = &rii->cache_extent;
13911                         ret = insert_cache_extent(roots_info_cache, entry);
13912                         ASSERT(ret == 0);
13913                 } else {
13914                         rii = container_of(entry, struct root_item_info,
13915                                            cache_extent);
13916                 }
13917
13918                 ASSERT(rii->cache_extent.start == root_id);
13919                 ASSERT(rii->cache_extent.size == 1);
13920
13921                 if (level > rii->level || rii->level == (u8)-1) {
13922                         rii->level = level;
13923                         rii->bytenr = found_key.objectid;
13924                         rii->gen = btrfs_extent_generation(leaf, ei);
13925                         rii->node_count = 1;
13926                 } else if (level == rii->level) {
13927                         rii->node_count++;
13928                 }
13929 next:
13930                 path.slots[0]++;
13931         }
13932
13933 out:
13934         btrfs_release_path(&path);
13935
13936         return ret;
13937 }
13938
13939 static int maybe_repair_root_item(struct btrfs_path *path,
13940                                   const struct btrfs_key *root_key,
13941                                   const int read_only_mode)
13942 {
13943         const u64 root_id = root_key->objectid;
13944         struct cache_extent *entry;
13945         struct root_item_info *rii;
13946         struct btrfs_root_item ri;
13947         unsigned long offset;
13948
13949         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13950         if (!entry) {
13951                 fprintf(stderr,
13952                         "Error: could not find extent items for root %llu\n",
13953                         root_key->objectid);
13954                 return -ENOENT;
13955         }
13956
13957         rii = container_of(entry, struct root_item_info, cache_extent);
13958         ASSERT(rii->cache_extent.start == root_id);
13959         ASSERT(rii->cache_extent.size == 1);
13960
13961         if (rii->node_count != 1) {
13962                 fprintf(stderr,
13963                         "Error: could not find btree root extent for root %llu\n",
13964                         root_id);
13965                 return -ENOENT;
13966         }
13967
13968         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13969         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13970
13971         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13972             btrfs_root_level(&ri) != rii->level ||
13973             btrfs_root_generation(&ri) != rii->gen) {
13974
13975                 /*
13976                  * If we're in repair mode but our caller told us to not update
13977                  * the root item, i.e. just check if it needs to be updated, don't
13978                  * print this message, since the caller will call us again shortly
13979                  * for the same root item without read only mode (the caller will
13980                  * open a transaction first).
13981                  */
13982                 if (!(read_only_mode && repair))
13983                         fprintf(stderr,
13984                                 "%sroot item for root %llu,"
13985                                 " current bytenr %llu, current gen %llu, current level %u,"
13986                                 " new bytenr %llu, new gen %llu, new level %u\n",
13987                                 (read_only_mode ? "" : "fixing "),
13988                                 root_id,
13989                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13990                                 btrfs_root_level(&ri),
13991                                 rii->bytenr, rii->gen, rii->level);
13992
13993                 if (btrfs_root_generation(&ri) > rii->gen) {
13994                         fprintf(stderr,
13995                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13996                                 root_id, btrfs_root_generation(&ri), rii->gen);
13997                         return -EINVAL;
13998                 }
13999
14000                 if (!read_only_mode) {
14001                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14002                         btrfs_set_root_level(&ri, rii->level);
14003                         btrfs_set_root_generation(&ri, rii->gen);
14004                         write_extent_buffer(path->nodes[0], &ri,
14005                                             offset, sizeof(ri));
14006                 }
14007
14008                 return 1;
14009         }
14010
14011         return 0;
14012 }
14013
14014 /*
14015  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14016  * caused read-only snapshots to be corrupted if they were created at a moment
14017  * when the source subvolume/snapshot had orphan items. The issue was that the
14018  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14019  * node instead of the post orphan cleanup root node.
14020  * So this function, and its callees, just detects and fixes those cases. Even
14021  * though the regression was for read-only snapshots, this function applies to
14022  * any snapshot/subvolume root.
14023  * This must be run before any other repair code - not doing it so, makes other
14024  * repair code delete or modify backrefs in the extent tree for example, which
14025  * will result in an inconsistent fs after repairing the root items.
14026  */
14027 static int repair_root_items(struct btrfs_fs_info *info)
14028 {
14029         struct btrfs_path path;
14030         struct btrfs_key key;
14031         struct extent_buffer *leaf;
14032         struct btrfs_trans_handle *trans = NULL;
14033         int ret = 0;
14034         int bad_roots = 0;
14035         int need_trans = 0;
14036
14037         btrfs_init_path(&path);
14038
14039         ret = build_roots_info_cache(info);
14040         if (ret)
14041                 goto out;
14042
14043         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14044         key.type = BTRFS_ROOT_ITEM_KEY;
14045         key.offset = 0;
14046
14047 again:
14048         /*
14049          * Avoid opening and committing transactions if a leaf doesn't have
14050          * any root items that need to be fixed, so that we avoid rotating
14051          * backup roots unnecessarily.
14052          */
14053         if (need_trans) {
14054                 trans = btrfs_start_transaction(info->tree_root, 1);
14055                 if (IS_ERR(trans)) {
14056                         ret = PTR_ERR(trans);
14057                         goto out;
14058                 }
14059         }
14060
14061         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14062                                 0, trans ? 1 : 0);
14063         if (ret < 0)
14064                 goto out;
14065         leaf = path.nodes[0];
14066
14067         while (1) {
14068                 struct btrfs_key found_key;
14069
14070                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14071                         int no_more_keys = find_next_key(&path, &key);
14072
14073                         btrfs_release_path(&path);
14074                         if (trans) {
14075                                 ret = btrfs_commit_transaction(trans,
14076                                                                info->tree_root);
14077                                 trans = NULL;
14078                                 if (ret < 0)
14079                                         goto out;
14080                         }
14081                         need_trans = 0;
14082                         if (no_more_keys)
14083                                 break;
14084                         goto again;
14085                 }
14086
14087                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14088
14089                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14090                         goto next;
14091                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14092                         goto next;
14093
14094                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14095                 if (ret < 0)
14096                         goto out;
14097                 if (ret) {
14098                         if (!trans && repair) {
14099                                 need_trans = 1;
14100                                 key = found_key;
14101                                 btrfs_release_path(&path);
14102                                 goto again;
14103                         }
14104                         bad_roots++;
14105                 }
14106 next:
14107                 path.slots[0]++;
14108         }
14109         ret = 0;
14110 out:
14111         free_roots_info_cache();
14112         btrfs_release_path(&path);
14113         if (trans)
14114                 btrfs_commit_transaction(trans, info->tree_root);
14115         if (ret < 0)
14116                 return ret;
14117
14118         return bad_roots;
14119 }
14120
14121 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14122 {
14123         struct btrfs_trans_handle *trans;
14124         struct btrfs_block_group_cache *bg_cache;
14125         u64 current = 0;
14126         int ret = 0;
14127
14128         /* Clear all free space cache inodes and its extent data */
14129         while (1) {
14130                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14131                 if (!bg_cache)
14132                         break;
14133                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14134                 if (ret < 0)
14135                         return ret;
14136                 current = bg_cache->key.objectid + bg_cache->key.offset;
14137         }
14138
14139         /* Don't forget to set cache_generation to -1 */
14140         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14141         if (IS_ERR(trans)) {
14142                 error("failed to update super block cache generation");
14143                 return PTR_ERR(trans);
14144         }
14145         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14146         btrfs_commit_transaction(trans, fs_info->tree_root);
14147
14148         return ret;
14149 }
14150
14151 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14152                 int clear_version)
14153 {
14154         int ret = 0;
14155
14156         if (clear_version == 1) {
14157                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14158                         error(
14159                 "free space cache v2 detected, use --clear-space-cache v2");
14160                         ret = 1;
14161                         goto close_out;
14162                 }
14163                 printf("Clearing free space cache\n");
14164                 ret = clear_free_space_cache(fs_info);
14165                 if (ret) {
14166                         error("failed to clear free space cache");
14167                         ret = 1;
14168                 } else {
14169                         printf("Free space cache cleared\n");
14170                 }
14171         } else if (clear_version == 2) {
14172                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14173                         printf("no free space cache v2 to clear\n");
14174                         ret = 0;
14175                         goto close_out;
14176                 }
14177                 printf("Clear free space cache v2\n");
14178                 ret = btrfs_clear_free_space_tree(fs_info);
14179                 if (ret) {
14180                         error("failed to clear free space cache v2: %d", ret);
14181                         ret = 1;
14182                 } else {
14183                         printf("free space cache v2 cleared\n");
14184                 }
14185         }
14186 close_out:
14187         return ret;
14188 }
14189
14190 const char * const cmd_check_usage[] = {
14191         "btrfs check [options] <device>",
14192         "Check structural integrity of a filesystem (unmounted).",
14193         "Check structural integrity of an unmounted filesystem. Verify internal",
14194         "trees' consistency and item connectivity. In the repair mode try to",
14195         "fix the problems found. ",
14196         "WARNING: the repair mode is considered dangerous",
14197         "",
14198         "-s|--super <superblock>     use this superblock copy",
14199         "-b|--backup                 use the first valid backup root copy",
14200         "--force                     skip mount checks, repair is not possible",
14201         "--repair                    try to repair the filesystem",
14202         "--readonly                  run in read-only mode (default)",
14203         "--init-csum-tree            create a new CRC tree",
14204         "--init-extent-tree          create a new extent tree",
14205         "--mode <MODE>               allows choice of memory/IO trade-offs",
14206         "                            where MODE is one of:",
14207         "                            original - read inodes and extents to memory (requires",
14208         "                                       more memory, does less IO)",
14209         "                            lowmem   - try to use less memory but read blocks again",
14210         "                                       when needed",
14211         "--check-data-csum           verify checksums of data blocks",
14212         "-Q|--qgroup-report          print a report on qgroup consistency",
14213         "-E|--subvol-extents <subvolid>",
14214         "                            print subvolume extents and sharing state",
14215         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14216         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14217         "-p|--progress               indicate progress",
14218         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14219         NULL
14220 };
14221
14222 int cmd_check(int argc, char **argv)
14223 {
14224         struct cache_tree root_cache;
14225         struct btrfs_root *root;
14226         struct btrfs_fs_info *info;
14227         u64 bytenr = 0;
14228         u64 subvolid = 0;
14229         u64 tree_root_bytenr = 0;
14230         u64 chunk_root_bytenr = 0;
14231         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14232         int ret = 0;
14233         int err = 0;
14234         u64 num;
14235         int init_csum_tree = 0;
14236         int readonly = 0;
14237         int clear_space_cache = 0;
14238         int qgroup_report = 0;
14239         int qgroups_repaired = 0;
14240         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14241         int force = 0;
14242
14243         while(1) {
14244                 int c;
14245                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14246                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14247                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14248                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14249                         GETOPT_VAL_FORCE };
14250                 static const struct option long_options[] = {
14251                         { "super", required_argument, NULL, 's' },
14252                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14253                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14254                         { "init-csum-tree", no_argument, NULL,
14255                                 GETOPT_VAL_INIT_CSUM },
14256                         { "init-extent-tree", no_argument, NULL,
14257                                 GETOPT_VAL_INIT_EXTENT },
14258                         { "check-data-csum", no_argument, NULL,
14259                                 GETOPT_VAL_CHECK_CSUM },
14260                         { "backup", no_argument, NULL, 'b' },
14261                         { "subvol-extents", required_argument, NULL, 'E' },
14262                         { "qgroup-report", no_argument, NULL, 'Q' },
14263                         { "tree-root", required_argument, NULL, 'r' },
14264                         { "chunk-root", required_argument, NULL,
14265                                 GETOPT_VAL_CHUNK_TREE },
14266                         { "progress", no_argument, NULL, 'p' },
14267                         { "mode", required_argument, NULL,
14268                                 GETOPT_VAL_MODE },
14269                         { "clear-space-cache", required_argument, NULL,
14270                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14271                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14272                         { NULL, 0, NULL, 0}
14273                 };
14274
14275                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14276                 if (c < 0)
14277                         break;
14278                 switch(c) {
14279                         case 'a': /* ignored */ break;
14280                         case 'b':
14281                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14282                                 break;
14283                         case 's':
14284                                 num = arg_strtou64(optarg);
14285                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14286                                         error(
14287                                         "super mirror should be less than %d",
14288                                                 BTRFS_SUPER_MIRROR_MAX);
14289                                         exit(1);
14290                                 }
14291                                 bytenr = btrfs_sb_offset(((int)num));
14292                                 printf("using SB copy %llu, bytenr %llu\n", num,
14293                                        (unsigned long long)bytenr);
14294                                 break;
14295                         case 'Q':
14296                                 qgroup_report = 1;
14297                                 break;
14298                         case 'E':
14299                                 subvolid = arg_strtou64(optarg);
14300                                 break;
14301                         case 'r':
14302                                 tree_root_bytenr = arg_strtou64(optarg);
14303                                 break;
14304                         case GETOPT_VAL_CHUNK_TREE:
14305                                 chunk_root_bytenr = arg_strtou64(optarg);
14306                                 break;
14307                         case 'p':
14308                                 ctx.progress_enabled = true;
14309                                 break;
14310                         case '?':
14311                         case 'h':
14312                                 usage(cmd_check_usage);
14313                         case GETOPT_VAL_REPAIR:
14314                                 printf("enabling repair mode\n");
14315                                 repair = 1;
14316                                 ctree_flags |= OPEN_CTREE_WRITES;
14317                                 break;
14318                         case GETOPT_VAL_READONLY:
14319                                 readonly = 1;
14320                                 break;
14321                         case GETOPT_VAL_INIT_CSUM:
14322                                 printf("Creating a new CRC tree\n");
14323                                 init_csum_tree = 1;
14324                                 repair = 1;
14325                                 ctree_flags |= OPEN_CTREE_WRITES;
14326                                 break;
14327                         case GETOPT_VAL_INIT_EXTENT:
14328                                 init_extent_tree = 1;
14329                                 ctree_flags |= (OPEN_CTREE_WRITES |
14330                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14331                                 repair = 1;
14332                                 break;
14333                         case GETOPT_VAL_CHECK_CSUM:
14334                                 check_data_csum = 1;
14335                                 break;
14336                         case GETOPT_VAL_MODE:
14337                                 check_mode = parse_check_mode(optarg);
14338                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14339                                         error("unknown mode: %s", optarg);
14340                                         exit(1);
14341                                 }
14342                                 break;
14343                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14344                                 if (strcmp(optarg, "v1") == 0) {
14345                                         clear_space_cache = 1;
14346                                 } else if (strcmp(optarg, "v2") == 0) {
14347                                         clear_space_cache = 2;
14348                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14349                                 } else {
14350                                         error(
14351                 "invalid argument to --clear-space-cache, must be v1 or v2");
14352                                         exit(1);
14353                                 }
14354                                 ctree_flags |= OPEN_CTREE_WRITES;
14355                                 break;
14356                         case GETOPT_VAL_FORCE:
14357                                 force = 1;
14358                                 break;
14359                 }
14360         }
14361
14362         if (check_argc_exact(argc - optind, 1))
14363                 usage(cmd_check_usage);
14364
14365         if (ctx.progress_enabled) {
14366                 ctx.tp = TASK_NOTHING;
14367                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14368         }
14369
14370         /* This check is the only reason for --readonly to exist */
14371         if (readonly && repair) {
14372                 error("repair options are not compatible with --readonly");
14373                 exit(1);
14374         }
14375
14376         /*
14377          * experimental and dangerous
14378          */
14379         if (repair && check_mode == CHECK_MODE_LOWMEM)
14380                 warning("low-memory mode repair support is only partial");
14381
14382         radix_tree_init();
14383         cache_tree_init(&root_cache);
14384
14385         ret = check_mounted(argv[optind]);
14386         if (!force) {
14387                 if (ret < 0) {
14388                         error("could not check mount status: %s",
14389                                         strerror(-ret));
14390                         err |= !!ret;
14391                         goto err_out;
14392                 } else if (ret) {
14393                         error(
14394 "%s is currently mounted, use --force if you really intend to check the filesystem",
14395                                 argv[optind]);
14396                         ret = -EBUSY;
14397                         err |= !!ret;
14398                         goto err_out;
14399                 }
14400         } else {
14401                 if (repair) {
14402                         error("repair and --force is not yet supported");
14403                         ret = 1;
14404                         err |= !!ret;
14405                         goto err_out;
14406                 }
14407                 if (ret < 0) {
14408                         warning(
14409 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14410                                 argv[optind]);
14411                 } else if (ret) {
14412                         warning(
14413                         "filesystem mounted, continuing because of --force");
14414                 }
14415                 /* A block device is mounted in exclusive mode by kernel */
14416                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14417         }
14418
14419         /* only allow partial opening under repair mode */
14420         if (repair)
14421                 ctree_flags |= OPEN_CTREE_PARTIAL;
14422
14423         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14424                                   chunk_root_bytenr, ctree_flags);
14425         if (!info) {
14426                 error("cannot open file system");
14427                 ret = -EIO;
14428                 err |= !!ret;
14429                 goto err_out;
14430         }
14431
14432         global_info = info;
14433         root = info->fs_root;
14434         uuid_unparse(info->super_copy->fsid, uuidbuf);
14435
14436         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14437
14438         /*
14439          * Check the bare minimum before starting anything else that could rely
14440          * on it, namely the tree roots, any local consistency checks
14441          */
14442         if (!extent_buffer_uptodate(info->tree_root->node) ||
14443             !extent_buffer_uptodate(info->dev_root->node) ||
14444             !extent_buffer_uptodate(info->chunk_root->node)) {
14445                 error("critical roots corrupted, unable to check the filesystem");
14446                 err |= !!ret;
14447                 ret = -EIO;
14448                 goto close_out;
14449         }
14450
14451         if (clear_space_cache) {
14452                 ret = do_clear_free_space_cache(info, clear_space_cache);
14453                 err |= !!ret;
14454                 goto close_out;
14455         }
14456
14457         /*
14458          * repair mode will force us to commit transaction which
14459          * will make us fail to load log tree when mounting.
14460          */
14461         if (repair && btrfs_super_log_root(info->super_copy)) {
14462                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14463                 if (!ret) {
14464                         ret = 1;
14465                         err |= !!ret;
14466                         goto close_out;
14467                 }
14468                 ret = zero_log_tree(root);
14469                 err |= !!ret;
14470                 if (ret) {
14471                         error("failed to zero log tree: %d", ret);
14472                         goto close_out;
14473                 }
14474         }
14475
14476         if (qgroup_report) {
14477                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14478                        uuidbuf);
14479                 ret = qgroup_verify_all(info);
14480                 err |= !!ret;
14481                 if (ret == 0)
14482                         report_qgroups(1);
14483                 goto close_out;
14484         }
14485         if (subvolid) {
14486                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14487                        subvolid, argv[optind], uuidbuf);
14488                 ret = print_extent_state(info, subvolid);
14489                 err |= !!ret;
14490                 goto close_out;
14491         }
14492
14493         if (init_extent_tree || init_csum_tree) {
14494                 struct btrfs_trans_handle *trans;
14495
14496                 trans = btrfs_start_transaction(info->extent_root, 0);
14497                 if (IS_ERR(trans)) {
14498                         error("error starting transaction");
14499                         ret = PTR_ERR(trans);
14500                         err |= !!ret;
14501                         goto close_out;
14502                 }
14503
14504                 if (init_extent_tree) {
14505                         printf("Creating a new extent tree\n");
14506                         ret = reinit_extent_tree(trans, info);
14507                         err |= !!ret;
14508                         if (ret)
14509                                 goto close_out;
14510                 }
14511
14512                 if (init_csum_tree) {
14513                         printf("Reinitialize checksum tree\n");
14514                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14515                         if (ret) {
14516                                 error("checksum tree initialization failed: %d",
14517                                                 ret);
14518                                 ret = -EIO;
14519                                 err |= !!ret;
14520                                 goto close_out;
14521                         }
14522
14523                         ret = fill_csum_tree(trans, info->csum_root,
14524                                              init_extent_tree);
14525                         err |= !!ret;
14526                         if (ret) {
14527                                 error("checksum tree refilling failed: %d", ret);
14528                                 return -EIO;
14529                         }
14530                 }
14531                 /*
14532                  * Ok now we commit and run the normal fsck, which will add
14533                  * extent entries for all of the items it finds.
14534                  */
14535                 ret = btrfs_commit_transaction(trans, info->extent_root);
14536                 err |= !!ret;
14537                 if (ret)
14538                         goto close_out;
14539         }
14540         if (!extent_buffer_uptodate(info->extent_root->node)) {
14541                 error("critical: extent_root, unable to check the filesystem");
14542                 ret = -EIO;
14543                 err |= !!ret;
14544                 goto close_out;
14545         }
14546         if (!extent_buffer_uptodate(info->csum_root->node)) {
14547                 error("critical: csum_root, unable to check the filesystem");
14548                 ret = -EIO;
14549                 err |= !!ret;
14550                 goto close_out;
14551         }
14552
14553         if (!init_extent_tree) {
14554                 ret = repair_root_items(info);
14555                 if (ret < 0) {
14556                         err = !!ret;
14557                         error("failed to repair root items: %s", strerror(-ret));
14558                         goto close_out;
14559                 }
14560                 if (repair) {
14561                         fprintf(stderr, "Fixed %d roots.\n", ret);
14562                         ret = 0;
14563                 } else if (ret > 0) {
14564                         fprintf(stderr,
14565                                 "Found %d roots with an outdated root item.\n",
14566                                 ret);
14567                         fprintf(stderr,
14568         "Please run a filesystem check with the option --repair to fix them.\n");
14569                         ret = 1;
14570                         err |= ret;
14571                         goto close_out;
14572                 }
14573         }
14574
14575         ret = do_check_chunks_and_extents(info);
14576         err |= !!ret;
14577         if (ret)
14578                 error(
14579                 "errors found in extent allocation tree or chunk allocation");
14580
14581         /* Only re-check super size after we checked and repaired the fs */
14582         err |= !is_super_size_valid(info);
14583
14584         if (!ctx.progress_enabled) {
14585                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14586                         fprintf(stderr, "checking free space tree\n");
14587                 else
14588                         fprintf(stderr, "checking free space cache\n");
14589         }
14590         ret = check_space_cache(root);
14591         err |= !!ret;
14592         if (ret) {
14593                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14594                         error("errors found in free space tree");
14595                 else
14596                         error("errors found in free space cache");
14597                 goto out;
14598         }
14599
14600         /*
14601          * We used to have to have these hole extents in between our real
14602          * extents so if we don't have this flag set we need to make sure there
14603          * are no gaps in the file extents for inodes, otherwise we can just
14604          * ignore it when this happens.
14605          */
14606         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14607         ret = do_check_fs_roots(info, &root_cache);
14608         err |= !!ret;
14609         if (ret) {
14610                 error("errors found in fs roots");
14611                 goto out;
14612         }
14613
14614         fprintf(stderr, "checking csums\n");
14615         ret = check_csums(root);
14616         err |= !!ret;
14617         if (ret) {
14618                 error("errors found in csum tree");
14619                 goto out;
14620         }
14621
14622         fprintf(stderr, "checking root refs\n");
14623         /* For low memory mode, check_fs_roots_v2 handles root refs */
14624         if (check_mode != CHECK_MODE_LOWMEM) {
14625                 ret = check_root_refs(root, &root_cache);
14626                 err |= !!ret;
14627                 if (ret) {
14628                         error("errors found in root refs");
14629                         goto out;
14630                 }
14631         }
14632
14633         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14634                 struct extent_buffer *eb;
14635
14636                 eb = list_first_entry(&root->fs_info->recow_ebs,
14637                                       struct extent_buffer, recow);
14638                 list_del_init(&eb->recow);
14639                 ret = recow_extent_buffer(root, eb);
14640                 err |= !!ret;
14641                 if (ret) {
14642                         error("fails to fix transid errors");
14643                         break;
14644                 }
14645         }
14646
14647         while (!list_empty(&delete_items)) {
14648                 struct bad_item *bad;
14649
14650                 bad = list_first_entry(&delete_items, struct bad_item, list);
14651                 list_del_init(&bad->list);
14652                 if (repair) {
14653                         ret = delete_bad_item(root, bad);
14654                         err |= !!ret;
14655                 }
14656                 free(bad);
14657         }
14658
14659         if (info->quota_enabled) {
14660                 fprintf(stderr, "checking quota groups\n");
14661                 ret = qgroup_verify_all(info);
14662                 err |= !!ret;
14663                 if (ret) {
14664                         error("failed to check quota groups");
14665                         goto out;
14666                 }
14667                 report_qgroups(0);
14668                 ret = repair_qgroups(info, &qgroups_repaired);
14669                 err |= !!ret;
14670                 if (err) {
14671                         error("failed to repair quota groups");
14672                         goto out;
14673                 }
14674                 ret = 0;
14675         }
14676
14677         if (!list_empty(&root->fs_info->recow_ebs)) {
14678                 error("transid errors in file system");
14679                 ret = 1;
14680                 err |= !!ret;
14681         }
14682 out:
14683         printf("found %llu bytes used, ",
14684                (unsigned long long)bytes_used);
14685         if (err)
14686                 printf("error(s) found\n");
14687         else
14688                 printf("no error found\n");
14689         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14690         printf("total tree bytes: %llu\n",
14691                (unsigned long long)total_btree_bytes);
14692         printf("total fs tree bytes: %llu\n",
14693                (unsigned long long)total_fs_tree_bytes);
14694         printf("total extent tree bytes: %llu\n",
14695                (unsigned long long)total_extent_tree_bytes);
14696         printf("btree space waste bytes: %llu\n",
14697                (unsigned long long)btree_space_waste);
14698         printf("file data blocks allocated: %llu\n referenced %llu\n",
14699                 (unsigned long long)data_bytes_allocated,
14700                 (unsigned long long)data_bytes_referenced);
14701
14702         free_qgroup_counts();
14703         free_root_recs_tree(&root_cache);
14704 close_out:
14705         close_ctree(root);
14706 err_out:
14707         if (ctx.progress_enabled)
14708                 task_deinit(ctx.info);
14709
14710         return err;
14711 }