btrfs-progs: check: Move __create_inode_item function to check/common.c
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
429 {
430         struct device_record *rec1;
431         struct device_record *rec2;
432
433         rec1 = rb_entry(node1, struct device_record, node);
434         rec2 = rb_entry(node2, struct device_record, node);
435         if (rec1->devid > rec2->devid)
436                 return -1;
437         else if (rec1->devid < rec2->devid)
438                 return 1;
439         else
440                 return 0;
441 }
442
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
444 {
445         struct inode_record *rec;
446         struct inode_backref *backref;
447         struct inode_backref *orig;
448         struct inode_backref *tmp;
449         struct orphan_data_extent *src_orphan;
450         struct orphan_data_extent *dst_orphan;
451         struct rb_node *rb;
452         size_t size;
453         int ret;
454
455         rec = malloc(sizeof(*rec));
456         if (!rec)
457                 return ERR_PTR(-ENOMEM);
458         memcpy(rec, orig_rec, sizeof(*rec));
459         rec->refs = 1;
460         INIT_LIST_HEAD(&rec->backrefs);
461         INIT_LIST_HEAD(&rec->orphan_extents);
462         rec->holes = RB_ROOT;
463
464         list_for_each_entry(orig, &orig_rec->backrefs, list) {
465                 size = sizeof(*orig) + orig->namelen + 1;
466                 backref = malloc(size);
467                 if (!backref) {
468                         ret = -ENOMEM;
469                         goto cleanup;
470                 }
471                 memcpy(backref, orig, size);
472                 list_add_tail(&backref->list, &rec->backrefs);
473         }
474         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475                 dst_orphan = malloc(sizeof(*dst_orphan));
476                 if (!dst_orphan) {
477                         ret = -ENOMEM;
478                         goto cleanup;
479                 }
480                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
482         }
483         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
484         if (ret < 0)
485                 goto cleanup_rb;
486
487         return rec;
488
489 cleanup_rb:
490         rb = rb_first(&rec->holes);
491         while (rb) {
492                 struct file_extent_hole *hole;
493
494                 hole = rb_entry(rb, struct file_extent_hole, node);
495                 rb = rb_next(rb);
496                 free(hole);
497         }
498
499 cleanup:
500         if (!list_empty(&rec->backrefs))
501                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502                         list_del(&orig->list);
503                         free(orig);
504                 }
505
506         if (!list_empty(&rec->orphan_extents))
507                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508                         list_del(&orig->list);
509                         free(orig);
510                 }
511
512         free(rec);
513
514         return ERR_PTR(ret);
515 }
516
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
518                                       u64 objectid)
519 {
520         struct orphan_data_extent *orphan;
521
522         if (list_empty(orphan_extents))
523                 return;
524         printf("The following data extent is lost in tree %llu:\n",
525                objectid);
526         list_for_each_entry(orphan, orphan_extents, list) {
527                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
529                        orphan->disk_len);
530         }
531 }
532
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
534 {
535         u64 root_objectid = root->root_key.objectid;
536         int errors = rec->errors;
537
538         if (!errors)
539                 return;
540         /* reloc root errors, we print its corresponding fs root objectid*/
541         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542                 root_objectid = root->root_key.offset;
543                 fprintf(stderr, "reloc");
544         }
545         fprintf(stderr, "root %llu inode %llu errors %x",
546                 (unsigned long long) root_objectid,
547                 (unsigned long long) rec->ino, rec->errors);
548
549         if (errors & I_ERR_NO_INODE_ITEM)
550                 fprintf(stderr, ", no inode item");
551         if (errors & I_ERR_NO_ORPHAN_ITEM)
552                 fprintf(stderr, ", no orphan item");
553         if (errors & I_ERR_DUP_INODE_ITEM)
554                 fprintf(stderr, ", dup inode item");
555         if (errors & I_ERR_DUP_DIR_INDEX)
556                 fprintf(stderr, ", dup dir index");
557         if (errors & I_ERR_ODD_DIR_ITEM)
558                 fprintf(stderr, ", odd dir item");
559         if (errors & I_ERR_ODD_FILE_EXTENT)
560                 fprintf(stderr, ", odd file extent");
561         if (errors & I_ERR_BAD_FILE_EXTENT)
562                 fprintf(stderr, ", bad file extent");
563         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564                 fprintf(stderr, ", file extent overlap");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int process_file_extent(struct btrfs_root *root,
1428                                 struct extent_buffer *eb,
1429                                 int slot, struct btrfs_key *key,
1430                                 struct shared_node *active_node)
1431 {
1432         struct inode_record *rec;
1433         struct btrfs_file_extent_item *fi;
1434         u64 num_bytes = 0;
1435         u64 disk_bytenr = 0;
1436         u64 extent_offset = 0;
1437         u64 mask = root->fs_info->sectorsize - 1;
1438         int extent_type;
1439         int ret;
1440
1441         rec = active_node->current;
1442         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1443         rec->found_file_extent = 1;
1444
1445         if (rec->extent_start == (u64)-1) {
1446                 rec->extent_start = key->offset;
1447                 rec->extent_end = key->offset;
1448         }
1449
1450         if (rec->extent_end > key->offset)
1451                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1452         else if (rec->extent_end < key->offset) {
1453                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1454                                            key->offset - rec->extent_end);
1455                 if (ret < 0)
1456                         return ret;
1457         }
1458
1459         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1460         extent_type = btrfs_file_extent_type(eb, fi);
1461
1462         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1463                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1464                 if (num_bytes == 0)
1465                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1466                 rec->found_size += num_bytes;
1467                 num_bytes = (num_bytes + mask) & ~mask;
1468         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1469                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1470                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1471                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1472                 extent_offset = btrfs_file_extent_offset(eb, fi);
1473                 if (num_bytes == 0 || (num_bytes & mask))
1474                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1475                 if (num_bytes + extent_offset >
1476                     btrfs_file_extent_ram_bytes(eb, fi))
1477                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1478                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1479                     (btrfs_file_extent_compression(eb, fi) ||
1480                      btrfs_file_extent_encryption(eb, fi) ||
1481                      btrfs_file_extent_other_encoding(eb, fi)))
1482                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1483                 if (disk_bytenr > 0)
1484                         rec->found_size += num_bytes;
1485         } else {
1486                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1487         }
1488         rec->extent_end = key->offset + num_bytes;
1489
1490         /*
1491          * The data reloc tree will copy full extents into its inode and then
1492          * copy the corresponding csums.  Because the extent it copied could be
1493          * a preallocated extent that hasn't been written to yet there may be no
1494          * csums to copy, ergo we won't have csums for our file extent.  This is
1495          * ok so just don't bother checking csums if the inode belongs to the
1496          * data reloc tree.
1497          */
1498         if (disk_bytenr > 0 &&
1499             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1500                 u64 found;
1501                 if (btrfs_file_extent_compression(eb, fi))
1502                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1503                 else
1504                         disk_bytenr += extent_offset;
1505
1506                 ret = count_csum_range(root->fs_info, disk_bytenr, num_bytes,
1507                                        &found);
1508                 if (ret < 0)
1509                         return ret;
1510                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1511                         if (found > 0)
1512                                 rec->found_csum_item = 1;
1513                         if (found < num_bytes)
1514                                 rec->some_csum_missing = 1;
1515                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1516                         if (found > 0)
1517                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1518                 }
1519         }
1520         return 0;
1521 }
1522
1523 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1524                             struct walk_control *wc)
1525 {
1526         struct btrfs_key key;
1527         u32 nritems;
1528         int i;
1529         int ret = 0;
1530         struct cache_tree *inode_cache;
1531         struct shared_node *active_node;
1532
1533         if (wc->root_level == wc->active_node &&
1534             btrfs_root_refs(&root->root_item) == 0)
1535                 return 0;
1536
1537         active_node = wc->nodes[wc->active_node];
1538         inode_cache = &active_node->inode_cache;
1539         nritems = btrfs_header_nritems(eb);
1540         for (i = 0; i < nritems; i++) {
1541                 btrfs_item_key_to_cpu(eb, &key, i);
1542
1543                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1544                         continue;
1545                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1546                         continue;
1547
1548                 if (active_node->current == NULL ||
1549                     active_node->current->ino < key.objectid) {
1550                         if (active_node->current) {
1551                                 active_node->current->checked = 1;
1552                                 maybe_free_inode_rec(inode_cache,
1553                                                      active_node->current);
1554                         }
1555                         active_node->current = get_inode_rec(inode_cache,
1556                                                              key.objectid, 1);
1557                         BUG_ON(IS_ERR(active_node->current));
1558                 }
1559                 switch (key.type) {
1560                 case BTRFS_DIR_ITEM_KEY:
1561                 case BTRFS_DIR_INDEX_KEY:
1562                         ret = process_dir_item(eb, i, &key, active_node);
1563                         break;
1564                 case BTRFS_INODE_REF_KEY:
1565                         ret = process_inode_ref(eb, i, &key, active_node);
1566                         break;
1567                 case BTRFS_INODE_EXTREF_KEY:
1568                         ret = process_inode_extref(eb, i, &key, active_node);
1569                         break;
1570                 case BTRFS_INODE_ITEM_KEY:
1571                         ret = process_inode_item(eb, i, &key, active_node);
1572                         break;
1573                 case BTRFS_EXTENT_DATA_KEY:
1574                         ret = process_file_extent(root, eb, i, &key,
1575                                                   active_node);
1576                         break;
1577                 default:
1578                         break;
1579                 };
1580         }
1581         return ret;
1582 }
1583
1584 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1585                              struct extent_buffer *eb, struct node_refs *nrefs,
1586                              u64 level, int check_all);
1587 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1588                             unsigned int ext_ref);
1589
1590 /*
1591  * Returns >0  Found error, not fatal, should continue
1592  * Returns <0  Fatal error, must exit the whole check
1593  * Returns 0   No errors found
1594  */
1595 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1596                                struct node_refs *nrefs, int *level, int ext_ref)
1597 {
1598         struct extent_buffer *cur = path->nodes[0];
1599         struct btrfs_key key;
1600         u64 cur_bytenr;
1601         u32 nritems;
1602         u64 first_ino = 0;
1603         int root_level = btrfs_header_level(root->node);
1604         int i;
1605         int ret = 0; /* Final return value */
1606         int err = 0; /* Positive error bitmap */
1607
1608         cur_bytenr = cur->start;
1609
1610         /* skip to first inode item or the first inode number change */
1611         nritems = btrfs_header_nritems(cur);
1612         for (i = 0; i < nritems; i++) {
1613                 btrfs_item_key_to_cpu(cur, &key, i);
1614                 if (i == 0)
1615                         first_ino = key.objectid;
1616                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1617                     (first_ino && first_ino != key.objectid))
1618                         break;
1619         }
1620         if (i == nritems) {
1621                 path->slots[0] = nritems;
1622                 return 0;
1623         }
1624         path->slots[0] = i;
1625
1626 again:
1627         err |= check_inode_item(root, path, ext_ref);
1628
1629         /* modify cur since check_inode_item may change path */
1630         cur = path->nodes[0];
1631
1632         if (err & LAST_ITEM)
1633                 goto out;
1634
1635         /* still have inode items in thie leaf */
1636         if (cur->start == cur_bytenr)
1637                 goto again;
1638
1639         /*
1640          * we have switched to another leaf, above nodes may
1641          * have changed, here walk down the path, if a node
1642          * or leaf is shared, check whether we can skip this
1643          * node or leaf.
1644          */
1645         for (i = root_level; i >= 0; i--) {
1646                 if (path->nodes[i]->start == nrefs->bytenr[i])
1647                         continue;
1648
1649                 ret = update_nodes_refs(root, path->nodes[i]->start,
1650                                 path->nodes[i], nrefs, i, 0);
1651                 if (ret)
1652                         goto out;
1653
1654                 if (!nrefs->need_check[i]) {
1655                         *level += 1;
1656                         break;
1657                 }
1658         }
1659
1660         for (i = 0; i < *level; i++) {
1661                 free_extent_buffer(path->nodes[i]);
1662                 path->nodes[i] = NULL;
1663         }
1664 out:
1665         err &= ~LAST_ITEM;
1666         if (err && !ret)
1667                 ret = err;
1668         return ret;
1669 }
1670
1671 static void reada_walk_down(struct btrfs_root *root,
1672                             struct extent_buffer *node, int slot)
1673 {
1674         struct btrfs_fs_info *fs_info = root->fs_info;
1675         u64 bytenr;
1676         u64 ptr_gen;
1677         u32 nritems;
1678         int i;
1679         int level;
1680
1681         level = btrfs_header_level(node);
1682         if (level != 1)
1683                 return;
1684
1685         nritems = btrfs_header_nritems(node);
1686         for (i = slot; i < nritems; i++) {
1687                 bytenr = btrfs_node_blockptr(node, i);
1688                 ptr_gen = btrfs_node_ptr_generation(node, i);
1689                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1690         }
1691 }
1692
1693 /*
1694  * Check the child node/leaf by the following condition:
1695  * 1. the first item key of the node/leaf should be the same with the one
1696  *    in parent.
1697  * 2. block in parent node should match the child node/leaf.
1698  * 3. generation of parent node and child's header should be consistent.
1699  *
1700  * Or the child node/leaf pointed by the key in parent is not valid.
1701  *
1702  * We hope to check leaf owner too, but since subvol may share leaves,
1703  * which makes leaf owner check not so strong, key check should be
1704  * sufficient enough for that case.
1705  */
1706 static int check_child_node(struct extent_buffer *parent, int slot,
1707                             struct extent_buffer *child)
1708 {
1709         struct btrfs_key parent_key;
1710         struct btrfs_key child_key;
1711         int ret = 0;
1712
1713         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1714         if (btrfs_header_level(child) == 0)
1715                 btrfs_item_key_to_cpu(child, &child_key, 0);
1716         else
1717                 btrfs_node_key_to_cpu(child, &child_key, 0);
1718
1719         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1720                 ret = -EINVAL;
1721                 fprintf(stderr,
1722                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1723                         parent_key.objectid, parent_key.type, parent_key.offset,
1724                         child_key.objectid, child_key.type, child_key.offset);
1725         }
1726         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1727                 ret = -EINVAL;
1728                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1729                         btrfs_node_blockptr(parent, slot),
1730                         btrfs_header_bytenr(child));
1731         }
1732         if (btrfs_node_ptr_generation(parent, slot) !=
1733             btrfs_header_generation(child)) {
1734                 ret = -EINVAL;
1735                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1736                         btrfs_header_generation(child),
1737                         btrfs_node_ptr_generation(parent, slot));
1738         }
1739         return ret;
1740 }
1741
1742 /*
1743  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1744  * in every fs or file tree check. Here we find its all root ids, and only check
1745  * it in the fs or file tree which has the smallest root id.
1746  */
1747 static int need_check(struct btrfs_root *root, struct ulist *roots)
1748 {
1749         struct rb_node *node;
1750         struct ulist_node *u;
1751
1752         /*
1753          * @roots can be empty if it belongs to tree reloc tree
1754          * In that case, we should always check the leaf, as we can't use
1755          * the tree owner to ensure some other root will check it.
1756          */
1757         if (roots->nnodes == 1 || roots->nnodes == 0)
1758                 return 1;
1759
1760         node = rb_first(&roots->root);
1761         u = rb_entry(node, struct ulist_node, rb_node);
1762         /*
1763          * current root id is not smallest, we skip it and let it be checked
1764          * in the fs or file tree who hash the smallest root id.
1765          */
1766         if (root->objectid != u->val)
1767                 return 0;
1768
1769         return 1;
1770 }
1771
1772 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1773                                u64 *flags_ret)
1774 {
1775         struct btrfs_root *extent_root = root->fs_info->extent_root;
1776         struct btrfs_root_item *ri = &root->root_item;
1777         struct btrfs_extent_inline_ref *iref;
1778         struct btrfs_extent_item *ei;
1779         struct btrfs_key key;
1780         struct btrfs_path *path = NULL;
1781         unsigned long ptr;
1782         unsigned long end;
1783         u64 flags;
1784         u64 owner = 0;
1785         u64 offset;
1786         int slot;
1787         int type;
1788         int ret = 0;
1789
1790         /*
1791          * Except file/reloc tree, we can not have FULL BACKREF MODE
1792          */
1793         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1794                 goto normal;
1795
1796         /* root node */
1797         if (eb->start == btrfs_root_bytenr(ri))
1798                 goto normal;
1799
1800         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1801                 goto full_backref;
1802
1803         owner = btrfs_header_owner(eb);
1804         if (owner == root->objectid)
1805                 goto normal;
1806
1807         path = btrfs_alloc_path();
1808         if (!path)
1809                 return -ENOMEM;
1810
1811         key.objectid = btrfs_header_bytenr(eb);
1812         key.type = (u8)-1;
1813         key.offset = (u64)-1;
1814
1815         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1816         if (ret <= 0) {
1817                 ret = -EIO;
1818                 goto out;
1819         }
1820
1821         if (ret > 0) {
1822                 ret = btrfs_previous_extent_item(extent_root, path,
1823                                                  key.objectid);
1824                 if (ret)
1825                         goto full_backref;
1826
1827         }
1828         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1829
1830         eb = path->nodes[0];
1831         slot = path->slots[0];
1832         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1833
1834         flags = btrfs_extent_flags(eb, ei);
1835         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1836                 goto full_backref;
1837
1838         ptr = (unsigned long)(ei + 1);
1839         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1840
1841         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1842                 ptr += sizeof(struct btrfs_tree_block_info);
1843
1844 next:
1845         /* Reached extent item ends normally */
1846         if (ptr == end)
1847                 goto full_backref;
1848
1849         /* Beyond extent item end, wrong item size */
1850         if (ptr > end) {
1851                 error("extent item at bytenr %llu slot %d has wrong size",
1852                         eb->start, slot);
1853                 goto full_backref;
1854         }
1855
1856         iref = (struct btrfs_extent_inline_ref *)ptr;
1857         offset = btrfs_extent_inline_ref_offset(eb, iref);
1858         type = btrfs_extent_inline_ref_type(eb, iref);
1859
1860         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1861                 goto normal;
1862         ptr += btrfs_extent_inline_ref_size(type);
1863         goto next;
1864
1865 normal:
1866         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1867         goto out;
1868
1869 full_backref:
1870         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1871 out:
1872         btrfs_free_path(path);
1873         return ret;
1874 }
1875
1876 /*
1877  * for a tree node or leaf, we record its reference count, so later if we still
1878  * process this node or leaf, don't need to compute its reference count again.
1879  *
1880  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1881  */
1882 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1883                              struct extent_buffer *eb, struct node_refs *nrefs,
1884                              u64 level, int check_all)
1885 {
1886         struct ulist *roots;
1887         u64 refs = 0;
1888         u64 flags = 0;
1889         int root_level = btrfs_header_level(root->node);
1890         int check;
1891         int ret;
1892
1893         if (nrefs->bytenr[level] == bytenr)
1894                 return 0;
1895
1896         if (bytenr != (u64)-1) {
1897                 /* the return value of this function seems a mistake */
1898                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1899                                        level, 1, &refs, &flags);
1900                 /* temporary fix */
1901                 if (ret < 0 && !check_all)
1902                         return ret;
1903
1904                 nrefs->bytenr[level] = bytenr;
1905                 nrefs->refs[level] = refs;
1906                 nrefs->full_backref[level] = 0;
1907                 nrefs->checked[level] = 0;
1908
1909                 if (refs > 1) {
1910                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1911                                                    0, &roots);
1912                         if (ret)
1913                                 return -EIO;
1914
1915                         check = need_check(root, roots);
1916                         ulist_free(roots);
1917                         nrefs->need_check[level] = check;
1918                 } else {
1919                         if (!check_all) {
1920                                 nrefs->need_check[level] = 1;
1921                         } else {
1922                                 if (level == root_level) {
1923                                         nrefs->need_check[level] = 1;
1924                                 } else {
1925                                         /*
1926                                          * The node refs may have not been
1927                                          * updated if upper needs checking (the
1928                                          * lowest root_objectid) the node can
1929                                          * be checked.
1930                                          */
1931                                         nrefs->need_check[level] =
1932                                                 nrefs->need_check[level + 1];
1933                                 }
1934                         }
1935                 }
1936         }
1937
1938         if (check_all && eb) {
1939                 calc_extent_flag_v2(root, eb, &flags);
1940                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1941                         nrefs->full_backref[level] = 1;
1942         }
1943
1944         return 0;
1945 }
1946
1947 /*
1948  * @level           if @level == -1 means extent data item
1949  *                  else normal treeblocl.
1950  */
1951 static int should_check_extent_strictly(struct btrfs_root *root,
1952                                         struct node_refs *nrefs, int level)
1953 {
1954         int root_level = btrfs_header_level(root->node);
1955
1956         if (level > root_level || level < -1)
1957                 return 1;
1958         if (level == root_level)
1959                 return 1;
1960         /*
1961          * if the upper node is marked full backref, it should contain shared
1962          * backref of the parent (except owner == root->objectid).
1963          */
1964         while (++level <= root_level)
1965                 if (nrefs->refs[level] > 1)
1966                         return 0;
1967
1968         return 1;
1969 }
1970
1971 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
1972                           struct walk_control *wc, int *level,
1973                           struct node_refs *nrefs)
1974 {
1975         enum btrfs_tree_block_status status;
1976         u64 bytenr;
1977         u64 ptr_gen;
1978         struct btrfs_fs_info *fs_info = root->fs_info;
1979         struct extent_buffer *next;
1980         struct extent_buffer *cur;
1981         int ret, err = 0;
1982         u64 refs;
1983
1984         WARN_ON(*level < 0);
1985         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1986
1987         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
1988                 refs = nrefs->refs[*level];
1989                 ret = 0;
1990         } else {
1991                 ret = btrfs_lookup_extent_info(NULL, root,
1992                                        path->nodes[*level]->start,
1993                                        *level, 1, &refs, NULL);
1994                 if (ret < 0) {
1995                         err = ret;
1996                         goto out;
1997                 }
1998                 nrefs->bytenr[*level] = path->nodes[*level]->start;
1999                 nrefs->refs[*level] = refs;
2000         }
2001
2002         if (refs > 1) {
2003                 ret = enter_shared_node(root, path->nodes[*level]->start,
2004                                         refs, wc, *level);
2005                 if (ret > 0) {
2006                         err = ret;
2007                         goto out;
2008                 }
2009         }
2010
2011         while (*level >= 0) {
2012                 WARN_ON(*level < 0);
2013                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2014                 cur = path->nodes[*level];
2015
2016                 if (btrfs_header_level(cur) != *level)
2017                         WARN_ON(1);
2018
2019                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2020                         break;
2021                 if (*level == 0) {
2022                         ret = process_one_leaf(root, cur, wc);
2023                         if (ret < 0)
2024                                 err = ret;
2025                         break;
2026                 }
2027                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2028                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2029
2030                 if (bytenr == nrefs->bytenr[*level - 1]) {
2031                         refs = nrefs->refs[*level - 1];
2032                 } else {
2033                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2034                                         *level - 1, 1, &refs, NULL);
2035                         if (ret < 0) {
2036                                 refs = 0;
2037                         } else {
2038                                 nrefs->bytenr[*level - 1] = bytenr;
2039                                 nrefs->refs[*level - 1] = refs;
2040                         }
2041                 }
2042
2043                 if (refs > 1) {
2044                         ret = enter_shared_node(root, bytenr, refs,
2045                                                 wc, *level - 1);
2046                         if (ret > 0) {
2047                                 path->slots[*level]++;
2048                                 continue;
2049                         }
2050                 }
2051
2052                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2053                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2054                         free_extent_buffer(next);
2055                         reada_walk_down(root, cur, path->slots[*level]);
2056                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2057                         if (!extent_buffer_uptodate(next)) {
2058                                 struct btrfs_key node_key;
2059
2060                                 btrfs_node_key_to_cpu(path->nodes[*level],
2061                                                       &node_key,
2062                                                       path->slots[*level]);
2063                                 btrfs_add_corrupt_extent_record(root->fs_info,
2064                                                 &node_key,
2065                                                 path->nodes[*level]->start,
2066                                                 root->fs_info->nodesize,
2067                                                 *level);
2068                                 err = -EIO;
2069                                 goto out;
2070                         }
2071                 }
2072
2073                 ret = check_child_node(cur, path->slots[*level], next);
2074                 if (ret) {
2075                         free_extent_buffer(next);
2076                         err = ret;
2077                         goto out;
2078                 }
2079
2080                 if (btrfs_is_leaf(next))
2081                         status = btrfs_check_leaf(root, NULL, next);
2082                 else
2083                         status = btrfs_check_node(root, NULL, next);
2084                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2085                         free_extent_buffer(next);
2086                         err = -EIO;
2087                         goto out;
2088                 }
2089
2090                 *level = *level - 1;
2091                 free_extent_buffer(path->nodes[*level]);
2092                 path->nodes[*level] = next;
2093                 path->slots[*level] = 0;
2094         }
2095 out:
2096         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2097         return err;
2098 }
2099
2100 /*
2101  * Update global fs information.
2102  */
2103 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2104                          int level)
2105 {
2106         u32 free_nrs;
2107         struct extent_buffer *eb = path->nodes[level];
2108
2109         total_btree_bytes += eb->len;
2110         if (fs_root_objectid(root->objectid))
2111                 total_fs_tree_bytes += eb->len;
2112         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2113                 total_extent_tree_bytes += eb->len;
2114
2115         if (level == 0) {
2116                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2117         } else {
2118                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2119                             btrfs_header_nritems(eb));
2120                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2121         }
2122 }
2123
2124 /*
2125  * This function only handles BACKREF_MISSING,
2126  * If corresponding extent item exists, increase the ref, else insert an extent
2127  * item and backref.
2128  *
2129  * Returns error bits after repair.
2130  */
2131 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2132                                  struct btrfs_root *root,
2133                                  struct extent_buffer *node,
2134                                  struct node_refs *nrefs, int level, int err)
2135 {
2136         struct btrfs_fs_info *fs_info = root->fs_info;
2137         struct btrfs_root *extent_root = fs_info->extent_root;
2138         struct btrfs_path path;
2139         struct btrfs_extent_item *ei;
2140         struct btrfs_tree_block_info *bi;
2141         struct btrfs_key key;
2142         struct extent_buffer *eb;
2143         u32 size = sizeof(*ei);
2144         u32 node_size = root->fs_info->nodesize;
2145         int insert_extent = 0;
2146         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2147         int root_level = btrfs_header_level(root->node);
2148         int generation;
2149         int ret;
2150         u64 owner;
2151         u64 bytenr;
2152         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2153         u64 parent = 0;
2154
2155         if ((err & BACKREF_MISSING) == 0)
2156                 return err;
2157
2158         WARN_ON(level > BTRFS_MAX_LEVEL);
2159         WARN_ON(level < 0);
2160
2161         btrfs_init_path(&path);
2162         bytenr = btrfs_header_bytenr(node);
2163         owner = btrfs_header_owner(node);
2164         generation = btrfs_header_generation(node);
2165
2166         key.objectid = bytenr;
2167         key.type = (u8)-1;
2168         key.offset = (u64)-1;
2169
2170         /* Search for the extent item */
2171         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2172         if (ret <= 0) {
2173                 ret = -EIO;
2174                 goto out;
2175         }
2176
2177         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2178         if (ret)
2179                 insert_extent = 1;
2180
2181         /* calculate if the extent item flag is full backref or not */
2182         if (nrefs->full_backref[level] != 0)
2183                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2184
2185         /* insert an extent item */
2186         if (insert_extent) {
2187                 struct btrfs_disk_key copy_key;
2188
2189                 generation = btrfs_header_generation(node);
2190
2191                 if (level < root_level && nrefs->full_backref[level + 1] &&
2192                     owner != root->objectid) {
2193                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2194                 }
2195
2196                 key.objectid = bytenr;
2197                 if (!skinny_metadata) {
2198                         key.type = BTRFS_EXTENT_ITEM_KEY;
2199                         key.offset = node_size;
2200                         size += sizeof(*bi);
2201                 } else {
2202                         key.type = BTRFS_METADATA_ITEM_KEY;
2203                         key.offset = level;
2204                 }
2205
2206                 btrfs_release_path(&path);
2207                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2208                                               size);
2209                 if (ret)
2210                         goto out;
2211
2212                 eb = path.nodes[0];
2213                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2214
2215                 btrfs_set_extent_refs(eb, ei, 0);
2216                 btrfs_set_extent_generation(eb, ei, generation);
2217                 btrfs_set_extent_flags(eb, ei, flags);
2218
2219                 if (!skinny_metadata) {
2220                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2221                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2222                                              sizeof(*bi));
2223                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2224                         btrfs_set_disk_key_type(&copy_key, 0);
2225                         btrfs_set_disk_key_offset(&copy_key, 0);
2226
2227                         btrfs_set_tree_block_level(eb, bi, level);
2228                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2229                 }
2230                 btrfs_mark_buffer_dirty(eb);
2231                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2232                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2233
2234                 nrefs->refs[level] = 0;
2235                 nrefs->full_backref[level] =
2236                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2237                 btrfs_release_path(&path);
2238         }
2239
2240         if (level < root_level && nrefs->full_backref[level + 1] &&
2241             owner != root->objectid)
2242                 parent = nrefs->bytenr[level + 1];
2243
2244         /* increase the ref */
2245         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2246                         parent, root->objectid, level, 0);
2247
2248         nrefs->refs[level]++;
2249 out:
2250         btrfs_release_path(&path);
2251         if (ret) {
2252                 error(
2253         "failed to repair tree block ref start %llu root %llu due to %s",
2254                       bytenr, root->objectid, strerror(-ret));
2255         } else {
2256                 printf("Added one tree block ref start %llu %s %llu\n",
2257                        bytenr, parent ? "parent" : "root",
2258                        parent ? parent : root->objectid);
2259                 err &= ~BACKREF_MISSING;
2260         }
2261
2262         return err;
2263 }
2264
2265 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2266                             unsigned int ext_ref);
2267 static int check_tree_block_ref(struct btrfs_root *root,
2268                                 struct extent_buffer *eb, u64 bytenr,
2269                                 int level, u64 owner, struct node_refs *nrefs);
2270 static int check_leaf_items(struct btrfs_trans_handle *trans,
2271                             struct btrfs_root *root, struct btrfs_path *path,
2272                             struct node_refs *nrefs, int account_bytes);
2273
2274 /*
2275  * @trans      just for lowmem repair mode
2276  * @check all  if not 0 then check all tree block backrefs and items
2277  *             0 then just check relationship of items in fs tree(s)
2278  *
2279  * Returns >0  Found error, should continue
2280  * Returns <0  Fatal error, must exit the whole check
2281  * Returns 0   No errors found
2282  */
2283 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2284                              struct btrfs_root *root, struct btrfs_path *path,
2285                              int *level, struct node_refs *nrefs, int ext_ref,
2286                              int check_all)
2287
2288 {
2289         enum btrfs_tree_block_status status;
2290         u64 bytenr;
2291         u64 ptr_gen;
2292         struct btrfs_fs_info *fs_info = root->fs_info;
2293         struct extent_buffer *next;
2294         struct extent_buffer *cur;
2295         int ret;
2296         int err = 0;
2297         int check;
2298         int account_file_data = 0;
2299
2300         WARN_ON(*level < 0);
2301         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2302
2303         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2304                                 path->nodes[*level], nrefs, *level, check_all);
2305         if (ret < 0)
2306                 return ret;
2307
2308         while (*level >= 0) {
2309                 WARN_ON(*level < 0);
2310                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2311                 cur = path->nodes[*level];
2312                 bytenr = btrfs_header_bytenr(cur);
2313                 check = nrefs->need_check[*level];
2314
2315                 if (btrfs_header_level(cur) != *level)
2316                         WARN_ON(1);
2317                /*
2318                 * Update bytes accounting and check tree block ref
2319                 * NOTE: Doing accounting and check before checking nritems
2320                 * is necessary because of empty node/leaf.
2321                 */
2322                 if ((check_all && !nrefs->checked[*level]) ||
2323                     (!check_all && nrefs->need_check[*level])) {
2324                         ret = check_tree_block_ref(root, cur,
2325                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2326                            btrfs_header_owner(cur), nrefs);
2327
2328                         if (repair && ret)
2329                                 ret = repair_tree_block_ref(trans, root,
2330                                     path->nodes[*level], nrefs, *level, ret);
2331                         err |= ret;
2332
2333                         if (check_all && nrefs->need_check[*level] &&
2334                                 nrefs->refs[*level]) {
2335                                 account_bytes(root, path, *level);
2336                                 account_file_data = 1;
2337                         }
2338                         nrefs->checked[*level] = 1;
2339                 }
2340
2341                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2342                         break;
2343
2344                 /* Don't forgot to check leaf/node validation */
2345                 if (*level == 0) {
2346                         /* skip duplicate check */
2347                         if (check || !check_all) {
2348                                 ret = btrfs_check_leaf(root, NULL, cur);
2349                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2350                                         err |= -EIO;
2351                                         break;
2352                                 }
2353                         }
2354
2355                         ret = 0;
2356                         if (!check_all)
2357                                 ret = process_one_leaf_v2(root, path, nrefs,
2358                                                           level, ext_ref);
2359                         else
2360                                 ret = check_leaf_items(trans, root, path,
2361                                                nrefs, account_file_data);
2362                         err |= ret;
2363                         break;
2364                 } else {
2365                         if (check || !check_all) {
2366                                 ret = btrfs_check_node(root, NULL, cur);
2367                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2368                                         err |= -EIO;
2369                                         break;
2370                                 }
2371                         }
2372                 }
2373
2374                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2375                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2376
2377                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2378                                         check_all);
2379                 if (ret < 0)
2380                         break;
2381                 /*
2382                  * check all trees in check_chunks_and_extent_v2
2383                  * check shared node once in check_fs_roots
2384                  */
2385                 if (!check_all && !nrefs->need_check[*level - 1]) {
2386                         path->slots[*level]++;
2387                         continue;
2388                 }
2389
2390                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2391                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2392                         free_extent_buffer(next);
2393                         reada_walk_down(root, cur, path->slots[*level]);
2394                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2395                         if (!extent_buffer_uptodate(next)) {
2396                                 struct btrfs_key node_key;
2397
2398                                 btrfs_node_key_to_cpu(path->nodes[*level],
2399                                                       &node_key,
2400                                                       path->slots[*level]);
2401                                 btrfs_add_corrupt_extent_record(fs_info,
2402                                         &node_key, path->nodes[*level]->start,
2403                                         fs_info->nodesize, *level);
2404                                 err |= -EIO;
2405                                 break;
2406                         }
2407                 }
2408
2409                 ret = check_child_node(cur, path->slots[*level], next);
2410                 err |= ret;
2411                 if (ret < 0) 
2412                         break;
2413
2414                 if (btrfs_is_leaf(next))
2415                         status = btrfs_check_leaf(root, NULL, next);
2416                 else
2417                         status = btrfs_check_node(root, NULL, next);
2418                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2419                         free_extent_buffer(next);
2420                         err |= -EIO;
2421                         break;
2422                 }
2423
2424                 *level = *level - 1;
2425                 free_extent_buffer(path->nodes[*level]);
2426                 path->nodes[*level] = next;
2427                 path->slots[*level] = 0;
2428                 account_file_data = 0;
2429
2430                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2431         }
2432         return err;
2433 }
2434
2435 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2436                         struct walk_control *wc, int *level)
2437 {
2438         int i;
2439         struct extent_buffer *leaf;
2440
2441         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2442                 leaf = path->nodes[i];
2443                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2444                         path->slots[i]++;
2445                         *level = i;
2446                         return 0;
2447                 } else {
2448                         free_extent_buffer(path->nodes[*level]);
2449                         path->nodes[*level] = NULL;
2450                         BUG_ON(*level > wc->active_node);
2451                         if (*level == wc->active_node)
2452                                 leave_shared_node(root, wc, *level);
2453                         *level = i + 1;
2454                 }
2455         }
2456         return 1;
2457 }
2458
2459 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2460                            int *level)
2461 {
2462         int i;
2463         struct extent_buffer *leaf;
2464
2465         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2466                 leaf = path->nodes[i];
2467                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2468                         path->slots[i]++;
2469                         *level = i;
2470                         return 0;
2471                 } else {
2472                         free_extent_buffer(path->nodes[*level]);
2473                         path->nodes[*level] = NULL;
2474                         *level = i + 1;
2475                 }
2476         }
2477         return 1;
2478 }
2479
2480 static int check_root_dir(struct inode_record *rec)
2481 {
2482         struct inode_backref *backref;
2483         int ret = -1;
2484
2485         if (!rec->found_inode_item || rec->errors)
2486                 goto out;
2487         if (rec->nlink != 1 || rec->found_link != 0)
2488                 goto out;
2489         if (list_empty(&rec->backrefs))
2490                 goto out;
2491         backref = to_inode_backref(rec->backrefs.next);
2492         if (!backref->found_inode_ref)
2493                 goto out;
2494         if (backref->index != 0 || backref->namelen != 2 ||
2495             memcmp(backref->name, "..", 2))
2496                 goto out;
2497         if (backref->found_dir_index || backref->found_dir_item)
2498                 goto out;
2499         ret = 0;
2500 out:
2501         return ret;
2502 }
2503
2504 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2505                               struct btrfs_root *root, struct btrfs_path *path,
2506                               struct inode_record *rec)
2507 {
2508         struct btrfs_inode_item *ei;
2509         struct btrfs_key key;
2510         int ret;
2511
2512         key.objectid = rec->ino;
2513         key.type = BTRFS_INODE_ITEM_KEY;
2514         key.offset = (u64)-1;
2515
2516         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517         if (ret < 0)
2518                 goto out;
2519         if (ret) {
2520                 if (!path->slots[0]) {
2521                         ret = -ENOENT;
2522                         goto out;
2523                 }
2524                 path->slots[0]--;
2525                 ret = 0;
2526         }
2527         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2528         if (key.objectid != rec->ino) {
2529                 ret = -ENOENT;
2530                 goto out;
2531         }
2532
2533         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2534                             struct btrfs_inode_item);
2535         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2536         btrfs_mark_buffer_dirty(path->nodes[0]);
2537         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2538         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2539                root->root_key.objectid);
2540 out:
2541         btrfs_release_path(path);
2542         return ret;
2543 }
2544
2545 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2546                                     struct btrfs_root *root,
2547                                     struct btrfs_path *path,
2548                                     struct inode_record *rec)
2549 {
2550         int ret;
2551
2552         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2553         btrfs_release_path(path);
2554         if (!ret)
2555                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2556         return ret;
2557 }
2558
2559 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2560                                struct btrfs_root *root,
2561                                struct btrfs_path *path,
2562                                struct inode_record *rec)
2563 {
2564         struct btrfs_inode_item *ei;
2565         struct btrfs_key key;
2566         int ret = 0;
2567
2568         key.objectid = rec->ino;
2569         key.type = BTRFS_INODE_ITEM_KEY;
2570         key.offset = 0;
2571
2572         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2573         if (ret) {
2574                 if (ret > 0)
2575                         ret = -ENOENT;
2576                 goto out;
2577         }
2578
2579         /* Since ret == 0, no need to check anything */
2580         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2581                             struct btrfs_inode_item);
2582         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2583         btrfs_mark_buffer_dirty(path->nodes[0]);
2584         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2585         printf("reset nbytes for ino %llu root %llu\n",
2586                rec->ino, root->root_key.objectid);
2587 out:
2588         btrfs_release_path(path);
2589         return ret;
2590 }
2591
2592 static int add_missing_dir_index(struct btrfs_root *root,
2593                                  struct cache_tree *inode_cache,
2594                                  struct inode_record *rec,
2595                                  struct inode_backref *backref)
2596 {
2597         struct btrfs_path path;
2598         struct btrfs_trans_handle *trans;
2599         struct btrfs_dir_item *dir_item;
2600         struct extent_buffer *leaf;
2601         struct btrfs_key key;
2602         struct btrfs_disk_key disk_key;
2603         struct inode_record *dir_rec;
2604         unsigned long name_ptr;
2605         u32 data_size = sizeof(*dir_item) + backref->namelen;
2606         int ret;
2607
2608         trans = btrfs_start_transaction(root, 1);
2609         if (IS_ERR(trans))
2610                 return PTR_ERR(trans);
2611
2612         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2613                 (unsigned long long)rec->ino);
2614
2615         btrfs_init_path(&path);
2616         key.objectid = backref->dir;
2617         key.type = BTRFS_DIR_INDEX_KEY;
2618         key.offset = backref->index;
2619         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2620         BUG_ON(ret);
2621
2622         leaf = path.nodes[0];
2623         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2624
2625         disk_key.objectid = cpu_to_le64(rec->ino);
2626         disk_key.type = BTRFS_INODE_ITEM_KEY;
2627         disk_key.offset = 0;
2628
2629         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2630         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2631         btrfs_set_dir_data_len(leaf, dir_item, 0);
2632         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2633         name_ptr = (unsigned long)(dir_item + 1);
2634         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2635         btrfs_mark_buffer_dirty(leaf);
2636         btrfs_release_path(&path);
2637         btrfs_commit_transaction(trans, root);
2638
2639         backref->found_dir_index = 1;
2640         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2641         BUG_ON(IS_ERR(dir_rec));
2642         if (!dir_rec)
2643                 return 0;
2644         dir_rec->found_size += backref->namelen;
2645         if (dir_rec->found_size == dir_rec->isize &&
2646             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2647                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2648         if (dir_rec->found_size != dir_rec->isize)
2649                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2650
2651         return 0;
2652 }
2653
2654 static int delete_dir_index(struct btrfs_root *root,
2655                             struct inode_backref *backref)
2656 {
2657         struct btrfs_trans_handle *trans;
2658         struct btrfs_dir_item *di;
2659         struct btrfs_path path;
2660         int ret = 0;
2661
2662         trans = btrfs_start_transaction(root, 1);
2663         if (IS_ERR(trans))
2664                 return PTR_ERR(trans);
2665
2666         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2667                 (unsigned long long)backref->dir,
2668                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2669                 (unsigned long long)root->objectid);
2670
2671         btrfs_init_path(&path);
2672         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2673                                     backref->name, backref->namelen,
2674                                     backref->index, -1);
2675         if (IS_ERR(di)) {
2676                 ret = PTR_ERR(di);
2677                 btrfs_release_path(&path);
2678                 btrfs_commit_transaction(trans, root);
2679                 if (ret == -ENOENT)
2680                         return 0;
2681                 return ret;
2682         }
2683
2684         if (!di)
2685                 ret = btrfs_del_item(trans, root, &path);
2686         else
2687                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2688         BUG_ON(ret);
2689         btrfs_release_path(&path);
2690         btrfs_commit_transaction(trans, root);
2691         return ret;
2692 }
2693
2694 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2695                                     struct btrfs_root *root, u64 ino,
2696                                     u8 filetype)
2697 {
2698         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2699
2700         return insert_inode_item(trans, root, ino, 0, 0, 0, mode);
2701 }
2702
2703 static int create_inode_item(struct btrfs_root *root,
2704                              struct inode_record *rec, int root_dir)
2705 {
2706         struct btrfs_trans_handle *trans;
2707         u64 nlink = 0;
2708         u32 mode = 0;
2709         u64 size = 0;
2710         int ret;
2711
2712         trans = btrfs_start_transaction(root, 1);
2713         if (IS_ERR(trans)) {
2714                 ret = PTR_ERR(trans);
2715                 return ret;
2716         }
2717
2718         nlink = root_dir ? 1 : rec->found_link;
2719         if (rec->found_dir_item) {
2720                 if (rec->found_file_extent)
2721                         fprintf(stderr, "root %llu inode %llu has both a dir "
2722                                 "item and extents, unsure if it is a dir or a "
2723                                 "regular file so setting it as a directory\n",
2724                                 (unsigned long long)root->objectid,
2725                                 (unsigned long long)rec->ino);
2726                 mode = S_IFDIR | 0755;
2727                 size = rec->found_size;
2728         } else if (!rec->found_dir_item) {
2729                 size = rec->extent_end;
2730                 mode =  S_IFREG | 0755;
2731         }
2732
2733         ret = insert_inode_item(trans, root, rec->ino, size, rec->nbytes,
2734                                   nlink, mode);
2735         btrfs_commit_transaction(trans, root);
2736         return 0;
2737 }
2738
2739 static int repair_inode_backrefs(struct btrfs_root *root,
2740                                  struct inode_record *rec,
2741                                  struct cache_tree *inode_cache,
2742                                  int delete)
2743 {
2744         struct inode_backref *tmp, *backref;
2745         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2746         int ret = 0;
2747         int repaired = 0;
2748
2749         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2750                 if (!delete && rec->ino == root_dirid) {
2751                         if (!rec->found_inode_item) {
2752                                 ret = create_inode_item(root, rec, 1);
2753                                 if (ret)
2754                                         break;
2755                                 repaired++;
2756                         }
2757                 }
2758
2759                 /* Index 0 for root dir's are special, don't mess with it */
2760                 if (rec->ino == root_dirid && backref->index == 0)
2761                         continue;
2762
2763                 if (delete &&
2764                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2765                      (backref->found_dir_index && backref->found_inode_ref &&
2766                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2767                         ret = delete_dir_index(root, backref);
2768                         if (ret)
2769                                 break;
2770                         repaired++;
2771                         list_del(&backref->list);
2772                         free(backref);
2773                         continue;
2774                 }
2775
2776                 if (!delete && !backref->found_dir_index &&
2777                     backref->found_dir_item && backref->found_inode_ref) {
2778                         ret = add_missing_dir_index(root, inode_cache, rec,
2779                                                     backref);
2780                         if (ret)
2781                                 break;
2782                         repaired++;
2783                         if (backref->found_dir_item &&
2784                             backref->found_dir_index) {
2785                                 if (!backref->errors &&
2786                                     backref->found_inode_ref) {
2787                                         list_del(&backref->list);
2788                                         free(backref);
2789                                         continue;
2790                                 }
2791                         }
2792                 }
2793
2794                 if (!delete && (!backref->found_dir_index &&
2795                                 !backref->found_dir_item &&
2796                                 backref->found_inode_ref)) {
2797                         struct btrfs_trans_handle *trans;
2798                         struct btrfs_key location;
2799
2800                         ret = check_dir_conflict(root, backref->name,
2801                                                  backref->namelen,
2802                                                  backref->dir,
2803                                                  backref->index);
2804                         if (ret) {
2805                                 /*
2806                                  * let nlink fixing routine to handle it,
2807                                  * which can do it better.
2808                                  */
2809                                 ret = 0;
2810                                 break;
2811                         }
2812                         location.objectid = rec->ino;
2813                         location.type = BTRFS_INODE_ITEM_KEY;
2814                         location.offset = 0;
2815
2816                         trans = btrfs_start_transaction(root, 1);
2817                         if (IS_ERR(trans)) {
2818                                 ret = PTR_ERR(trans);
2819                                 break;
2820                         }
2821                         fprintf(stderr, "adding missing dir index/item pair "
2822                                 "for inode %llu\n",
2823                                 (unsigned long long)rec->ino);
2824                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2825                                                     backref->namelen,
2826                                                     backref->dir, &location,
2827                                                     imode_to_type(rec->imode),
2828                                                     backref->index);
2829                         BUG_ON(ret);
2830                         btrfs_commit_transaction(trans, root);
2831                         repaired++;
2832                 }
2833
2834                 if (!delete && (backref->found_inode_ref &&
2835                                 backref->found_dir_index &&
2836                                 backref->found_dir_item &&
2837                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2838                                 !rec->found_inode_item)) {
2839                         ret = create_inode_item(root, rec, 0);
2840                         if (ret)
2841                                 break;
2842                         repaired++;
2843                 }
2844
2845         }
2846         return ret ? ret : repaired;
2847 }
2848
2849 /*
2850  * To determine the file type for nlink/inode_item repair
2851  *
2852  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2853  * Return -ENOENT if file type is not found.
2854  */
2855 static int find_file_type(struct inode_record *rec, u8 *type)
2856 {
2857         struct inode_backref *backref;
2858
2859         /* For inode item recovered case */
2860         if (rec->found_inode_item) {
2861                 *type = imode_to_type(rec->imode);
2862                 return 0;
2863         }
2864
2865         list_for_each_entry(backref, &rec->backrefs, list) {
2866                 if (backref->found_dir_index || backref->found_dir_item) {
2867                         *type = backref->filetype;
2868                         return 0;
2869                 }
2870         }
2871         return -ENOENT;
2872 }
2873
2874 /*
2875  * To determine the file name for nlink repair
2876  *
2877  * Return 0 if file name is found, set name and namelen.
2878  * Return -ENOENT if file name is not found.
2879  */
2880 static int find_file_name(struct inode_record *rec,
2881                           char *name, int *namelen)
2882 {
2883         struct inode_backref *backref;
2884
2885         list_for_each_entry(backref, &rec->backrefs, list) {
2886                 if (backref->found_dir_index || backref->found_dir_item ||
2887                     backref->found_inode_ref) {
2888                         memcpy(name, backref->name, backref->namelen);
2889                         *namelen = backref->namelen;
2890                         return 0;
2891                 }
2892         }
2893         return -ENOENT;
2894 }
2895
2896 /* Reset the nlink of the inode to the correct one */
2897 static int reset_nlink(struct btrfs_trans_handle *trans,
2898                        struct btrfs_root *root,
2899                        struct btrfs_path *path,
2900                        struct inode_record *rec)
2901 {
2902         struct inode_backref *backref;
2903         struct inode_backref *tmp;
2904         struct btrfs_key key;
2905         struct btrfs_inode_item *inode_item;
2906         int ret = 0;
2907
2908         /* We don't believe this either, reset it and iterate backref */
2909         rec->found_link = 0;
2910
2911         /* Remove all backref including the valid ones */
2912         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2913                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
2914                                    backref->index, backref->name,
2915                                    backref->namelen, 0);
2916                 if (ret < 0)
2917                         goto out;
2918
2919                 /* remove invalid backref, so it won't be added back */
2920                 if (!(backref->found_dir_index &&
2921                       backref->found_dir_item &&
2922                       backref->found_inode_ref)) {
2923                         list_del(&backref->list);
2924                         free(backref);
2925                 } else {
2926                         rec->found_link++;
2927                 }
2928         }
2929
2930         /* Set nlink to 0 */
2931         key.objectid = rec->ino;
2932         key.type = BTRFS_INODE_ITEM_KEY;
2933         key.offset = 0;
2934         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2935         if (ret < 0)
2936                 goto out;
2937         if (ret > 0) {
2938                 ret = -ENOENT;
2939                 goto out;
2940         }
2941         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2942                                     struct btrfs_inode_item);
2943         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
2944         btrfs_mark_buffer_dirty(path->nodes[0]);
2945         btrfs_release_path(path);
2946
2947         /*
2948          * Add back valid inode_ref/dir_item/dir_index,
2949          * add_link() will handle the nlink inc, so new nlink must be correct
2950          */
2951         list_for_each_entry(backref, &rec->backrefs, list) {
2952                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
2953                                      backref->name, backref->namelen,
2954                                      backref->filetype, &backref->index, 1, 0);
2955                 if (ret < 0)
2956                         goto out;
2957         }
2958 out:
2959         btrfs_release_path(path);
2960         return ret;
2961 }
2962
2963 static int get_highest_inode(struct btrfs_trans_handle *trans,
2964                                 struct btrfs_root *root,
2965                                 struct btrfs_path *path,
2966                                 u64 *highest_ino)
2967 {
2968         struct btrfs_key key, found_key;
2969         int ret;
2970
2971         btrfs_init_path(path);
2972         key.objectid = BTRFS_LAST_FREE_OBJECTID;
2973         key.offset = -1;
2974         key.type = BTRFS_INODE_ITEM_KEY;
2975         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2976         if (ret == 1) {
2977                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2978                                 path->slots[0] - 1);
2979                 *highest_ino = found_key.objectid;
2980                 ret = 0;
2981         }
2982         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
2983                 ret = -EOVERFLOW;
2984         btrfs_release_path(path);
2985         return ret;
2986 }
2987
2988 /*
2989  * Link inode to dir 'lost+found'. Increase @ref_count.
2990  *
2991  * Returns 0 means success.
2992  * Returns <0 means failure.
2993  */
2994 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
2995                                    struct btrfs_root *root,
2996                                    struct btrfs_path *path,
2997                                    u64 ino, char *namebuf, u32 name_len,
2998                                    u8 filetype, u64 *ref_count)
2999 {
3000         char *dir_name = "lost+found";
3001         u64 lost_found_ino;
3002         int ret;
3003         u32 mode = 0700;
3004
3005         btrfs_release_path(path);
3006         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3007         if (ret < 0)
3008                 goto out;
3009         lost_found_ino++;
3010
3011         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3012                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3013                           mode);
3014         if (ret < 0) {
3015                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3016                 goto out;
3017         }
3018         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3019                              namebuf, name_len, filetype, NULL, 1, 0);
3020         /*
3021          * Add ".INO" suffix several times to handle case where
3022          * "FILENAME.INO" is already taken by another file.
3023          */
3024         while (ret == -EEXIST) {
3025                 /*
3026                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3027                  */
3028                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3029                         ret = -EFBIG;
3030                         goto out;
3031                 }
3032                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3033                          ".%llu", ino);
3034                 name_len += count_digits(ino) + 1;
3035                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3036                                      name_len, filetype, NULL, 1, 0);
3037         }
3038         if (ret < 0) {
3039                 error("failed to link the inode %llu to %s dir: %s",
3040                       ino, dir_name, strerror(-ret));
3041                 goto out;
3042         }
3043
3044         ++*ref_count;
3045         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3046                name_len, namebuf, dir_name);
3047 out:
3048         btrfs_release_path(path);
3049         if (ret)
3050                 error("failed to move file '%.*s' to '%s' dir", name_len,
3051                                 namebuf, dir_name);
3052         return ret;
3053 }
3054
3055 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3056                                struct btrfs_root *root,
3057                                struct btrfs_path *path,
3058                                struct inode_record *rec)
3059 {
3060         char namebuf[BTRFS_NAME_LEN] = {0};
3061         u8 type = 0;
3062         int namelen = 0;
3063         int name_recovered = 0;
3064         int type_recovered = 0;
3065         int ret = 0;
3066
3067         /*
3068          * Get file name and type first before these invalid inode ref
3069          * are deleted by remove_all_invalid_backref()
3070          */
3071         name_recovered = !find_file_name(rec, namebuf, &namelen);
3072         type_recovered = !find_file_type(rec, &type);
3073
3074         if (!name_recovered) {
3075                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3076                        rec->ino, rec->ino);
3077                 namelen = count_digits(rec->ino);
3078                 sprintf(namebuf, "%llu", rec->ino);
3079                 name_recovered = 1;
3080         }
3081         if (!type_recovered) {
3082                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3083                        rec->ino);
3084                 type = BTRFS_FT_REG_FILE;
3085                 type_recovered = 1;
3086         }
3087
3088         ret = reset_nlink(trans, root, path, rec);
3089         if (ret < 0) {
3090                 fprintf(stderr,
3091                         "Failed to reset nlink for inode %llu: %s\n",
3092                         rec->ino, strerror(-ret));
3093                 goto out;
3094         }
3095
3096         if (rec->found_link == 0) {
3097                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3098                                               namebuf, namelen, type,
3099                                               (u64 *)&rec->found_link);
3100                 if (ret)
3101                         goto out;
3102         }
3103         printf("Fixed the nlink of inode %llu\n", rec->ino);
3104 out:
3105         /*
3106          * Clear the flag anyway, or we will loop forever for the same inode
3107          * as it will not be removed from the bad inode list and the dead loop
3108          * happens.
3109          */
3110         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3111         btrfs_release_path(path);
3112         return ret;
3113 }
3114
3115 /*
3116  * Check if there is any normal(reg or prealloc) file extent for given
3117  * ino.
3118  * This is used to determine the file type when neither its dir_index/item or
3119  * inode_item exists.
3120  *
3121  * This will *NOT* report error, if any error happens, just consider it does
3122  * not have any normal file extent.
3123  */
3124 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3125 {
3126         struct btrfs_path path;
3127         struct btrfs_key key;
3128         struct btrfs_key found_key;
3129         struct btrfs_file_extent_item *fi;
3130         u8 type;
3131         int ret = 0;
3132
3133         btrfs_init_path(&path);
3134         key.objectid = ino;
3135         key.type = BTRFS_EXTENT_DATA_KEY;
3136         key.offset = 0;
3137
3138         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3139         if (ret < 0) {
3140                 ret = 0;
3141                 goto out;
3142         }
3143         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3144                 ret = btrfs_next_leaf(root, &path);
3145                 if (ret) {
3146                         ret = 0;
3147                         goto out;
3148                 }
3149         }
3150         while (1) {
3151                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3152                                       path.slots[0]);
3153                 if (found_key.objectid != ino ||
3154                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3155                         break;
3156                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3157                                     struct btrfs_file_extent_item);
3158                 type = btrfs_file_extent_type(path.nodes[0], fi);
3159                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3160                         ret = 1;
3161                         goto out;
3162                 }
3163         }
3164 out:
3165         btrfs_release_path(&path);
3166         return ret;
3167 }
3168
3169 static u32 btrfs_type_to_imode(u8 type)
3170 {
3171         static u32 imode_by_btrfs_type[] = {
3172                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3173                 [BTRFS_FT_DIR]          = S_IFDIR,
3174                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3175                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3176                 [BTRFS_FT_FIFO]         = S_IFIFO,
3177                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3178                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3179         };
3180
3181         return imode_by_btrfs_type[(type)];
3182 }
3183
3184 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3185                                 struct btrfs_root *root,
3186                                 struct btrfs_path *path,
3187                                 struct inode_record *rec)
3188 {
3189         u8 filetype;
3190         u32 mode = 0700;
3191         int type_recovered = 0;
3192         int ret = 0;
3193
3194         printf("Trying to rebuild inode:%llu\n", rec->ino);
3195
3196         type_recovered = !find_file_type(rec, &filetype);
3197
3198         /*
3199          * Try to determine inode type if type not found.
3200          *
3201          * For found regular file extent, it must be FILE.
3202          * For found dir_item/index, it must be DIR.
3203          *
3204          * For undetermined one, use FILE as fallback.
3205          *
3206          * TODO:
3207          * 1. If found backref(inode_index/item is already handled) to it,
3208          *    it must be DIR.
3209          *    Need new inode-inode ref structure to allow search for that.
3210          */
3211         if (!type_recovered) {
3212                 if (rec->found_file_extent &&
3213                     find_normal_file_extent(root, rec->ino)) {
3214                         type_recovered = 1;
3215                         filetype = BTRFS_FT_REG_FILE;
3216                 } else if (rec->found_dir_item) {
3217                         type_recovered = 1;
3218                         filetype = BTRFS_FT_DIR;
3219                 } else if (!list_empty(&rec->orphan_extents)) {
3220                         type_recovered = 1;
3221                         filetype = BTRFS_FT_REG_FILE;
3222                 } else{
3223                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3224                                rec->ino);
3225                         type_recovered = 1;
3226                         filetype = BTRFS_FT_REG_FILE;
3227                 }
3228         }
3229
3230         ret = btrfs_new_inode(trans, root, rec->ino,
3231                               mode | btrfs_type_to_imode(filetype));
3232         if (ret < 0)
3233                 goto out;
3234
3235         /*
3236          * Here inode rebuild is done, we only rebuild the inode item,
3237          * don't repair the nlink(like move to lost+found).
3238          * That is the job of nlink repair.
3239          *
3240          * We just fill the record and return
3241          */
3242         rec->found_dir_item = 1;
3243         rec->imode = mode | btrfs_type_to_imode(filetype);
3244         rec->nlink = 0;
3245         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3246         /* Ensure the inode_nlinks repair function will be called */
3247         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3248 out:
3249         return ret;
3250 }
3251
3252 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3253                                       struct btrfs_root *root,
3254                                       struct btrfs_path *path,
3255                                       struct inode_record *rec)
3256 {
3257         struct orphan_data_extent *orphan;
3258         struct orphan_data_extent *tmp;
3259         int ret = 0;
3260
3261         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3262                 /*
3263                  * Check for conflicting file extents
3264                  *
3265                  * Here we don't know whether the extents is compressed or not,
3266                  * so we can only assume it not compressed nor data offset,
3267                  * and use its disk_len as extent length.
3268                  */
3269                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3270                                        orphan->offset, orphan->disk_len, 0);
3271                 btrfs_release_path(path);
3272                 if (ret < 0)
3273                         goto out;
3274                 if (!ret) {
3275                         fprintf(stderr,
3276                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3277                                 orphan->disk_bytenr, orphan->disk_len);
3278                         ret = btrfs_free_extent(trans,
3279                                         root->fs_info->extent_root,
3280                                         orphan->disk_bytenr, orphan->disk_len,
3281                                         0, root->objectid, orphan->objectid,
3282                                         orphan->offset);
3283                         if (ret < 0)
3284                                 goto out;
3285                 }
3286                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3287                                 orphan->offset, orphan->disk_bytenr,
3288                                 orphan->disk_len, orphan->disk_len);
3289                 if (ret < 0)
3290                         goto out;
3291
3292                 /* Update file size info */
3293                 rec->found_size += orphan->disk_len;
3294                 if (rec->found_size == rec->nbytes)
3295                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3296
3297                 /* Update the file extent hole info too */
3298                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3299                                            orphan->disk_len);
3300                 if (ret < 0)
3301                         goto out;
3302                 if (RB_EMPTY_ROOT(&rec->holes))
3303                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3304
3305                 list_del(&orphan->list);
3306                 free(orphan);
3307         }
3308         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3309 out:
3310         return ret;
3311 }
3312
3313 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3314                                         struct btrfs_root *root,
3315                                         struct btrfs_path *path,
3316                                         struct inode_record *rec)
3317 {
3318         struct rb_node *node;
3319         struct file_extent_hole *hole;
3320         int found = 0;
3321         int ret = 0;
3322
3323         node = rb_first(&rec->holes);
3324
3325         while (node) {
3326                 found = 1;
3327                 hole = rb_entry(node, struct file_extent_hole, node);
3328                 ret = btrfs_punch_hole(trans, root, rec->ino,
3329                                        hole->start, hole->len);
3330                 if (ret < 0)
3331                         goto out;
3332                 ret = del_file_extent_hole(&rec->holes, hole->start,
3333                                            hole->len);
3334                 if (ret < 0)
3335                         goto out;
3336                 if (RB_EMPTY_ROOT(&rec->holes))
3337                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3338                 node = rb_first(&rec->holes);
3339         }
3340         /* special case for a file losing all its file extent */
3341         if (!found) {
3342                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3343                                        round_up(rec->isize,
3344                                                 root->fs_info->sectorsize));
3345                 if (ret < 0)
3346                         goto out;
3347         }
3348         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3349                rec->ino, root->objectid);
3350 out:
3351         return ret;
3352 }
3353
3354 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3355 {
3356         struct btrfs_trans_handle *trans;
3357         struct btrfs_path path;
3358         int ret = 0;
3359
3360         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3361                              I_ERR_NO_ORPHAN_ITEM |
3362                              I_ERR_LINK_COUNT_WRONG |
3363                              I_ERR_NO_INODE_ITEM |
3364                              I_ERR_FILE_EXTENT_ORPHAN |
3365                              I_ERR_FILE_EXTENT_DISCOUNT|
3366                              I_ERR_FILE_NBYTES_WRONG)))
3367                 return rec->errors;
3368
3369         /*
3370          * For nlink repair, it may create a dir and add link, so
3371          * 2 for parent(256)'s dir_index and dir_item
3372          * 2 for lost+found dir's inode_item and inode_ref
3373          * 1 for the new inode_ref of the file
3374          * 2 for lost+found dir's dir_index and dir_item for the file
3375          */
3376         trans = btrfs_start_transaction(root, 7);
3377         if (IS_ERR(trans))
3378                 return PTR_ERR(trans);
3379
3380         btrfs_init_path(&path);
3381         if (rec->errors & I_ERR_NO_INODE_ITEM)
3382                 ret = repair_inode_no_item(trans, root, &path, rec);
3383         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3384                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3385         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3386                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3387         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3388                 ret = repair_inode_isize(trans, root, &path, rec);
3389         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3390                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3391         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3392                 ret = repair_inode_nlinks(trans, root, &path, rec);
3393         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3394                 ret = repair_inode_nbytes(trans, root, &path, rec);
3395         btrfs_commit_transaction(trans, root);
3396         btrfs_release_path(&path);
3397         return ret;
3398 }
3399
3400 static int check_inode_recs(struct btrfs_root *root,
3401                             struct cache_tree *inode_cache)
3402 {
3403         struct cache_extent *cache;
3404         struct ptr_node *node;
3405         struct inode_record *rec;
3406         struct inode_backref *backref;
3407         int stage = 0;
3408         int ret = 0;
3409         int err = 0;
3410         u64 error = 0;
3411         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3412
3413         if (btrfs_root_refs(&root->root_item) == 0) {
3414                 if (!cache_tree_empty(inode_cache))
3415                         fprintf(stderr, "warning line %d\n", __LINE__);
3416                 return 0;
3417         }
3418
3419         /*
3420          * We need to repair backrefs first because we could change some of the
3421          * errors in the inode recs.
3422          *
3423          * We also need to go through and delete invalid backrefs first and then
3424          * add the correct ones second.  We do this because we may get EEXIST
3425          * when adding back the correct index because we hadn't yet deleted the
3426          * invalid index.
3427          *
3428          * For example, if we were missing a dir index then the directories
3429          * isize would be wrong, so if we fixed the isize to what we thought it
3430          * would be and then fixed the backref we'd still have a invalid fs, so
3431          * we need to add back the dir index and then check to see if the isize
3432          * is still wrong.
3433          */
3434         while (stage < 3) {
3435                 stage++;
3436                 if (stage == 3 && !err)
3437                         break;
3438
3439                 cache = search_cache_extent(inode_cache, 0);
3440                 while (repair && cache) {
3441                         node = container_of(cache, struct ptr_node, cache);
3442                         rec = node->data;
3443                         cache = next_cache_extent(cache);
3444
3445                         /* Need to free everything up and rescan */
3446                         if (stage == 3) {
3447                                 remove_cache_extent(inode_cache, &node->cache);
3448                                 free(node);
3449                                 free_inode_rec(rec);
3450                                 continue;
3451                         }
3452
3453                         if (list_empty(&rec->backrefs))
3454                                 continue;
3455
3456                         ret = repair_inode_backrefs(root, rec, inode_cache,
3457                                                     stage == 1);
3458                         if (ret < 0) {
3459                                 err = ret;
3460                                 stage = 2;
3461                                 break;
3462                         } if (ret > 0) {
3463                                 err = -EAGAIN;
3464                         }
3465                 }
3466         }
3467         if (err)
3468                 return err;
3469
3470         rec = get_inode_rec(inode_cache, root_dirid, 0);
3471         BUG_ON(IS_ERR(rec));
3472         if (rec) {
3473                 ret = check_root_dir(rec);
3474                 if (ret) {
3475                         fprintf(stderr, "root %llu root dir %llu error\n",
3476                                 (unsigned long long)root->root_key.objectid,
3477                                 (unsigned long long)root_dirid);
3478                         print_inode_error(root, rec);
3479                         error++;
3480                 }
3481         } else {
3482                 if (repair) {
3483                         struct btrfs_trans_handle *trans;
3484
3485                         trans = btrfs_start_transaction(root, 1);
3486                         if (IS_ERR(trans)) {
3487                                 err = PTR_ERR(trans);
3488                                 return err;
3489                         }
3490
3491                         fprintf(stderr,
3492                                 "root %llu missing its root dir, recreating\n",
3493                                 (unsigned long long)root->objectid);
3494
3495                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3496                         BUG_ON(ret);
3497
3498                         btrfs_commit_transaction(trans, root);
3499                         return -EAGAIN;
3500                 }
3501
3502                 fprintf(stderr, "root %llu root dir %llu not found\n",
3503                         (unsigned long long)root->root_key.objectid,
3504                         (unsigned long long)root_dirid);
3505         }
3506
3507         while (1) {
3508                 cache = search_cache_extent(inode_cache, 0);
3509                 if (!cache)
3510                         break;
3511                 node = container_of(cache, struct ptr_node, cache);
3512                 rec = node->data;
3513                 remove_cache_extent(inode_cache, &node->cache);
3514                 free(node);
3515                 if (rec->ino == root_dirid ||
3516                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3517                         free_inode_rec(rec);
3518                         continue;
3519                 }
3520
3521                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3522                         ret = check_orphan_item(root, rec->ino);
3523                         if (ret == 0)
3524                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3525                         if (can_free_inode_rec(rec)) {
3526                                 free_inode_rec(rec);
3527                                 continue;
3528                         }
3529                 }
3530
3531                 if (!rec->found_inode_item)
3532                         rec->errors |= I_ERR_NO_INODE_ITEM;
3533                 if (rec->found_link != rec->nlink)
3534                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3535                 if (repair) {
3536                         ret = try_repair_inode(root, rec);
3537                         if (ret == 0 && can_free_inode_rec(rec)) {
3538                                 free_inode_rec(rec);
3539                                 continue;
3540                         }
3541                         ret = 0;
3542                 }
3543
3544                 if (!(repair && ret == 0))
3545                         error++;
3546                 print_inode_error(root, rec);
3547                 list_for_each_entry(backref, &rec->backrefs, list) {
3548                         if (!backref->found_dir_item)
3549                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3550                         if (!backref->found_dir_index)
3551                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3552                         if (!backref->found_inode_ref)
3553                                 backref->errors |= REF_ERR_NO_INODE_REF;
3554                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3555                                 " namelen %u name %s filetype %d errors %x",
3556                                 (unsigned long long)backref->dir,
3557                                 (unsigned long long)backref->index,
3558                                 backref->namelen, backref->name,
3559                                 backref->filetype, backref->errors);
3560                         print_ref_error(backref->errors);
3561                 }
3562                 free_inode_rec(rec);
3563         }
3564         return (error > 0) ? -1 : 0;
3565 }
3566
3567 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3568                                         u64 objectid)
3569 {
3570         struct cache_extent *cache;
3571         struct root_record *rec = NULL;
3572         int ret;
3573
3574         cache = lookup_cache_extent(root_cache, objectid, 1);
3575         if (cache) {
3576                 rec = container_of(cache, struct root_record, cache);
3577         } else {
3578                 rec = calloc(1, sizeof(*rec));
3579                 if (!rec)
3580                         return ERR_PTR(-ENOMEM);
3581                 rec->objectid = objectid;
3582                 INIT_LIST_HEAD(&rec->backrefs);
3583                 rec->cache.start = objectid;
3584                 rec->cache.size = 1;
3585
3586                 ret = insert_cache_extent(root_cache, &rec->cache);
3587                 if (ret)
3588                         return ERR_PTR(-EEXIST);
3589         }
3590         return rec;
3591 }
3592
3593 static struct root_backref *get_root_backref(struct root_record *rec,
3594                                              u64 ref_root, u64 dir, u64 index,
3595                                              const char *name, int namelen)
3596 {
3597         struct root_backref *backref;
3598
3599         list_for_each_entry(backref, &rec->backrefs, list) {
3600                 if (backref->ref_root != ref_root || backref->dir != dir ||
3601                     backref->namelen != namelen)
3602                         continue;
3603                 if (memcmp(name, backref->name, namelen))
3604                         continue;
3605                 return backref;
3606         }
3607
3608         backref = calloc(1, sizeof(*backref) + namelen + 1);
3609         if (!backref)
3610                 return NULL;
3611         backref->ref_root = ref_root;
3612         backref->dir = dir;
3613         backref->index = index;
3614         backref->namelen = namelen;
3615         memcpy(backref->name, name, namelen);
3616         backref->name[namelen] = '\0';
3617         list_add_tail(&backref->list, &rec->backrefs);
3618         return backref;
3619 }
3620
3621 static void free_root_record(struct cache_extent *cache)
3622 {
3623         struct root_record *rec;
3624         struct root_backref *backref;
3625
3626         rec = container_of(cache, struct root_record, cache);
3627         while (!list_empty(&rec->backrefs)) {
3628                 backref = to_root_backref(rec->backrefs.next);
3629                 list_del(&backref->list);
3630                 free(backref);
3631         }
3632
3633         free(rec);
3634 }
3635
3636 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3637
3638 static int add_root_backref(struct cache_tree *root_cache,
3639                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3640                             const char *name, int namelen,
3641                             int item_type, int errors)
3642 {
3643         struct root_record *rec;
3644         struct root_backref *backref;
3645
3646         rec = get_root_rec(root_cache, root_id);
3647         BUG_ON(IS_ERR(rec));
3648         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3649         BUG_ON(!backref);
3650
3651         backref->errors |= errors;
3652
3653         if (item_type != BTRFS_DIR_ITEM_KEY) {
3654                 if (backref->found_dir_index || backref->found_back_ref ||
3655                     backref->found_forward_ref) {
3656                         if (backref->index != index)
3657                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3658                 } else {
3659                         backref->index = index;
3660                 }
3661         }
3662
3663         if (item_type == BTRFS_DIR_ITEM_KEY) {
3664                 if (backref->found_forward_ref)
3665                         rec->found_ref++;
3666                 backref->found_dir_item = 1;
3667         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3668                 backref->found_dir_index = 1;
3669         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3670                 if (backref->found_forward_ref)
3671                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3672                 else if (backref->found_dir_item)
3673                         rec->found_ref++;
3674                 backref->found_forward_ref = 1;
3675         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3676                 if (backref->found_back_ref)
3677                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3678                 backref->found_back_ref = 1;
3679         } else {
3680                 BUG_ON(1);
3681         }
3682
3683         if (backref->found_forward_ref && backref->found_dir_item)
3684                 backref->reachable = 1;
3685         return 0;
3686 }
3687
3688 static int merge_root_recs(struct btrfs_root *root,
3689                            struct cache_tree *src_cache,
3690                            struct cache_tree *dst_cache)
3691 {
3692         struct cache_extent *cache;
3693         struct ptr_node *node;
3694         struct inode_record *rec;
3695         struct inode_backref *backref;
3696         int ret = 0;
3697
3698         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3699                 free_inode_recs_tree(src_cache);
3700                 return 0;
3701         }
3702
3703         while (1) {
3704                 cache = search_cache_extent(src_cache, 0);
3705                 if (!cache)
3706                         break;
3707                 node = container_of(cache, struct ptr_node, cache);
3708                 rec = node->data;
3709                 remove_cache_extent(src_cache, &node->cache);
3710                 free(node);
3711
3712                 ret = is_child_root(root, root->objectid, rec->ino);
3713                 if (ret < 0)
3714                         break;
3715                 else if (ret == 0)
3716                         goto skip;
3717
3718                 list_for_each_entry(backref, &rec->backrefs, list) {
3719                         BUG_ON(backref->found_inode_ref);
3720                         if (backref->found_dir_item)
3721                                 add_root_backref(dst_cache, rec->ino,
3722                                         root->root_key.objectid, backref->dir,
3723                                         backref->index, backref->name,
3724                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3725                                         backref->errors);
3726                         if (backref->found_dir_index)
3727                                 add_root_backref(dst_cache, rec->ino,
3728                                         root->root_key.objectid, backref->dir,
3729                                         backref->index, backref->name,
3730                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3731                                         backref->errors);
3732                 }
3733 skip:
3734                 free_inode_rec(rec);
3735         }
3736         if (ret < 0)
3737                 return ret;
3738         return 0;
3739 }
3740
3741 static int check_root_refs(struct btrfs_root *root,
3742                            struct cache_tree *root_cache)
3743 {
3744         struct root_record *rec;
3745         struct root_record *ref_root;
3746         struct root_backref *backref;
3747         struct cache_extent *cache;
3748         int loop = 1;
3749         int ret;
3750         int error;
3751         int errors = 0;
3752
3753         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3754         BUG_ON(IS_ERR(rec));
3755         rec->found_ref = 1;
3756
3757         /* fixme: this can not detect circular references */
3758         while (loop) {
3759                 loop = 0;
3760                 cache = search_cache_extent(root_cache, 0);
3761                 while (1) {
3762                         if (!cache)
3763                                 break;
3764                         rec = container_of(cache, struct root_record, cache);
3765                         cache = next_cache_extent(cache);
3766
3767                         if (rec->found_ref == 0)
3768                                 continue;
3769
3770                         list_for_each_entry(backref, &rec->backrefs, list) {
3771                                 if (!backref->reachable)
3772                                         continue;
3773
3774                                 ref_root = get_root_rec(root_cache,
3775                                                         backref->ref_root);
3776                                 BUG_ON(IS_ERR(ref_root));
3777                                 if (ref_root->found_ref > 0)
3778                                         continue;
3779
3780                                 backref->reachable = 0;
3781                                 rec->found_ref--;
3782                                 if (rec->found_ref == 0)
3783                                         loop = 1;
3784                         }
3785                 }
3786         }
3787
3788         cache = search_cache_extent(root_cache, 0);
3789         while (1) {
3790                 if (!cache)
3791                         break;
3792                 rec = container_of(cache, struct root_record, cache);
3793                 cache = next_cache_extent(cache);
3794
3795                 if (rec->found_ref == 0 &&
3796                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3797                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3798                         ret = check_orphan_item(root->fs_info->tree_root,
3799                                                 rec->objectid);
3800                         if (ret == 0)
3801                                 continue;
3802
3803                         /*
3804                          * If we don't have a root item then we likely just have
3805                          * a dir item in a snapshot for this root but no actual
3806                          * ref key or anything so it's meaningless.
3807                          */
3808                         if (!rec->found_root_item)
3809                                 continue;
3810                         errors++;
3811                         fprintf(stderr, "fs tree %llu not referenced\n",
3812                                 (unsigned long long)rec->objectid);
3813                 }
3814
3815                 error = 0;
3816                 if (rec->found_ref > 0 && !rec->found_root_item)
3817                         error = 1;
3818                 list_for_each_entry(backref, &rec->backrefs, list) {
3819                         if (!backref->found_dir_item)
3820                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3821                         if (!backref->found_dir_index)
3822                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3823                         if (!backref->found_back_ref)
3824                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3825                         if (!backref->found_forward_ref)
3826                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3827                         if (backref->reachable && backref->errors)
3828                                 error = 1;
3829                 }
3830                 if (!error)
3831                         continue;
3832
3833                 errors++;
3834                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3835                         (unsigned long long)rec->objectid, rec->found_ref,
3836                          rec->found_root_item ? "" : "not found");
3837
3838                 list_for_each_entry(backref, &rec->backrefs, list) {
3839                         if (!backref->reachable)
3840                                 continue;
3841                         if (!backref->errors && rec->found_root_item)
3842                                 continue;
3843                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3844                                 " index %llu namelen %u name %s errors %x\n",
3845                                 (unsigned long long)backref->ref_root,
3846                                 (unsigned long long)backref->dir,
3847                                 (unsigned long long)backref->index,
3848                                 backref->namelen, backref->name,
3849                                 backref->errors);
3850                         print_ref_error(backref->errors);
3851                 }
3852         }
3853         return errors > 0 ? 1 : 0;
3854 }
3855
3856 static int process_root_ref(struct extent_buffer *eb, int slot,
3857                             struct btrfs_key *key,
3858                             struct cache_tree *root_cache)
3859 {
3860         u64 dirid;
3861         u64 index;
3862         u32 len;
3863         u32 name_len;
3864         struct btrfs_root_ref *ref;
3865         char namebuf[BTRFS_NAME_LEN];
3866         int error;
3867
3868         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3869
3870         dirid = btrfs_root_ref_dirid(eb, ref);
3871         index = btrfs_root_ref_sequence(eb, ref);
3872         name_len = btrfs_root_ref_name_len(eb, ref);
3873
3874         if (name_len <= BTRFS_NAME_LEN) {
3875                 len = name_len;
3876                 error = 0;
3877         } else {
3878                 len = BTRFS_NAME_LEN;
3879                 error = REF_ERR_NAME_TOO_LONG;
3880         }
3881         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3882
3883         if (key->type == BTRFS_ROOT_REF_KEY) {
3884                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3885                                  index, namebuf, len, key->type, error);
3886         } else {
3887                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3888                                  index, namebuf, len, key->type, error);
3889         }
3890         return 0;
3891 }
3892
3893 static void free_corrupt_block(struct cache_extent *cache)
3894 {
3895         struct btrfs_corrupt_block *corrupt;
3896
3897         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
3898         free(corrupt);
3899 }
3900
3901 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
3902
3903 /*
3904  * Repair the btree of the given root.
3905  *
3906  * The fix is to remove the node key in corrupt_blocks cache_tree.
3907  * and rebalance the tree.
3908  * After the fix, the btree should be writeable.
3909  */
3910 static int repair_btree(struct btrfs_root *root,
3911                         struct cache_tree *corrupt_blocks)
3912 {
3913         struct btrfs_trans_handle *trans;
3914         struct btrfs_path path;
3915         struct btrfs_corrupt_block *corrupt;
3916         struct cache_extent *cache;
3917         struct btrfs_key key;
3918         u64 offset;
3919         int level;
3920         int ret = 0;
3921
3922         if (cache_tree_empty(corrupt_blocks))
3923                 return 0;
3924
3925         trans = btrfs_start_transaction(root, 1);
3926         if (IS_ERR(trans)) {
3927                 ret = PTR_ERR(trans);
3928                 fprintf(stderr, "Error starting transaction: %s\n",
3929                         strerror(-ret));
3930                 return ret;
3931         }
3932         btrfs_init_path(&path);
3933         cache = first_cache_extent(corrupt_blocks);
3934         while (cache) {
3935                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3936                                        cache);
3937                 level = corrupt->level;
3938                 path.lowest_level = level;
3939                 key.objectid = corrupt->key.objectid;
3940                 key.type = corrupt->key.type;
3941                 key.offset = corrupt->key.offset;
3942
3943                 /*
3944                  * Here we don't want to do any tree balance, since it may
3945                  * cause a balance with corrupted brother leaf/node,
3946                  * so ins_len set to 0 here.
3947                  * Balance will be done after all corrupt node/leaf is deleted.
3948                  */
3949                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
3950                 if (ret < 0)
3951                         goto out;
3952                 offset = btrfs_node_blockptr(path.nodes[level],
3953                                              path.slots[level]);
3954
3955                 /* Remove the ptr */
3956                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
3957                 if (ret < 0)
3958                         goto out;
3959                 /*
3960                  * Remove the corresponding extent
3961                  * return value is not concerned.
3962                  */
3963                 btrfs_release_path(&path);
3964                 ret = btrfs_free_extent(trans, root, offset,
3965                                 root->fs_info->nodesize, 0,
3966                                 root->root_key.objectid, level - 1, 0);
3967                 cache = next_cache_extent(cache);
3968         }
3969
3970         /* Balance the btree using btrfs_search_slot() */
3971         cache = first_cache_extent(corrupt_blocks);
3972         while (cache) {
3973                 corrupt = container_of(cache, struct btrfs_corrupt_block,
3974                                        cache);
3975                 memcpy(&key, &corrupt->key, sizeof(key));
3976                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
3977                 if (ret < 0)
3978                         goto out;
3979                 /* return will always >0 since it won't find the item */
3980                 ret = 0;
3981                 btrfs_release_path(&path);
3982                 cache = next_cache_extent(cache);
3983         }
3984 out:
3985         btrfs_commit_transaction(trans, root);
3986         btrfs_release_path(&path);
3987         return ret;
3988 }
3989
3990 static int check_fs_root(struct btrfs_root *root,
3991                          struct cache_tree *root_cache,
3992                          struct walk_control *wc)
3993 {
3994         int ret = 0;
3995         int err = 0;
3996         int wret;
3997         int level;
3998         struct btrfs_path path;
3999         struct shared_node root_node;
4000         struct root_record *rec;
4001         struct btrfs_root_item *root_item = &root->root_item;
4002         struct cache_tree corrupt_blocks;
4003         struct orphan_data_extent *orphan;
4004         struct orphan_data_extent *tmp;
4005         enum btrfs_tree_block_status status;
4006         struct node_refs nrefs;
4007
4008         /*
4009          * Reuse the corrupt_block cache tree to record corrupted tree block
4010          *
4011          * Unlike the usage in extent tree check, here we do it in a per
4012          * fs/subvol tree base.
4013          */
4014         cache_tree_init(&corrupt_blocks);
4015         root->fs_info->corrupt_blocks = &corrupt_blocks;
4016
4017         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4018                 rec = get_root_rec(root_cache, root->root_key.objectid);
4019                 BUG_ON(IS_ERR(rec));
4020                 if (btrfs_root_refs(root_item) > 0)
4021                         rec->found_root_item = 1;
4022         }
4023
4024         btrfs_init_path(&path);
4025         memset(&root_node, 0, sizeof(root_node));
4026         cache_tree_init(&root_node.root_cache);
4027         cache_tree_init(&root_node.inode_cache);
4028         memset(&nrefs, 0, sizeof(nrefs));
4029
4030         /* Move the orphan extent record to corresponding inode_record */
4031         list_for_each_entry_safe(orphan, tmp,
4032                                  &root->orphan_data_extents, list) {
4033                 struct inode_record *inode;
4034
4035                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4036                                       1);
4037                 BUG_ON(IS_ERR(inode));
4038                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4039                 list_move(&orphan->list, &inode->orphan_extents);
4040         }
4041
4042         level = btrfs_header_level(root->node);
4043         memset(wc->nodes, 0, sizeof(wc->nodes));
4044         wc->nodes[level] = &root_node;
4045         wc->active_node = level;
4046         wc->root_level = level;
4047
4048         /* We may not have checked the root block, lets do that now */
4049         if (btrfs_is_leaf(root->node))
4050                 status = btrfs_check_leaf(root, NULL, root->node);
4051         else
4052                 status = btrfs_check_node(root, NULL, root->node);
4053         if (status != BTRFS_TREE_BLOCK_CLEAN)
4054                 return -EIO;
4055
4056         if (btrfs_root_refs(root_item) > 0 ||
4057             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4058                 path.nodes[level] = root->node;
4059                 extent_buffer_get(root->node);
4060                 path.slots[level] = 0;
4061         } else {
4062                 struct btrfs_key key;
4063                 struct btrfs_disk_key found_key;
4064
4065                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4066                 level = root_item->drop_level;
4067                 path.lowest_level = level;
4068                 if (level > btrfs_header_level(root->node) ||
4069                     level >= BTRFS_MAX_LEVEL) {
4070                         error("ignoring invalid drop level: %u", level);
4071                         goto skip_walking;
4072                 }
4073                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4074                 if (wret < 0)
4075                         goto skip_walking;
4076                 btrfs_node_key(path.nodes[level], &found_key,
4077                                 path.slots[level]);
4078                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4079                                         sizeof(found_key)));
4080         }
4081
4082         while (1) {
4083                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4084                 if (wret < 0)
4085                         ret = wret;
4086                 if (wret != 0)
4087                         break;
4088
4089                 wret = walk_up_tree(root, &path, wc, &level);
4090                 if (wret < 0)
4091                         ret = wret;
4092                 if (wret != 0)
4093                         break;
4094         }
4095 skip_walking:
4096         btrfs_release_path(&path);
4097
4098         if (!cache_tree_empty(&corrupt_blocks)) {
4099                 struct cache_extent *cache;
4100                 struct btrfs_corrupt_block *corrupt;
4101
4102                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4103                        root->root_key.objectid);
4104                 cache = first_cache_extent(&corrupt_blocks);
4105                 while (cache) {
4106                         corrupt = container_of(cache,
4107                                                struct btrfs_corrupt_block,
4108                                                cache);
4109                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4110                                cache->start, corrupt->level,
4111                                corrupt->key.objectid, corrupt->key.type,
4112                                corrupt->key.offset);
4113                         cache = next_cache_extent(cache);
4114                 }
4115                 if (repair) {
4116                         printf("Try to repair the btree for root %llu\n",
4117                                root->root_key.objectid);
4118                         ret = repair_btree(root, &corrupt_blocks);
4119                         if (ret < 0)
4120                                 fprintf(stderr, "Failed to repair btree: %s\n",
4121                                         strerror(-ret));
4122                         if (!ret)
4123                                 printf("Btree for root %llu is fixed\n",
4124                                        root->root_key.objectid);
4125                 }
4126         }
4127
4128         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4129         if (err < 0)
4130                 ret = err;
4131
4132         if (root_node.current) {
4133                 root_node.current->checked = 1;
4134                 maybe_free_inode_rec(&root_node.inode_cache,
4135                                 root_node.current);
4136         }
4137
4138         err = check_inode_recs(root, &root_node.inode_cache);
4139         if (!ret)
4140                 ret = err;
4141
4142         free_corrupt_blocks_tree(&corrupt_blocks);
4143         root->fs_info->corrupt_blocks = NULL;
4144         free_orphan_data_extents(&root->orphan_data_extents);
4145         return ret;
4146 }
4147
4148 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4149                           struct cache_tree *root_cache)
4150 {
4151         struct btrfs_path path;
4152         struct btrfs_key key;
4153         struct walk_control wc;
4154         struct extent_buffer *leaf, *tree_node;
4155         struct btrfs_root *tmp_root;
4156         struct btrfs_root *tree_root = fs_info->tree_root;
4157         int ret;
4158         int err = 0;
4159
4160         if (ctx.progress_enabled) {
4161                 ctx.tp = TASK_FS_ROOTS;
4162                 task_start(ctx.info);
4163         }
4164
4165         /*
4166          * Just in case we made any changes to the extent tree that weren't
4167          * reflected into the free space cache yet.
4168          */
4169         if (repair)
4170                 reset_cached_block_groups(fs_info);
4171         memset(&wc, 0, sizeof(wc));
4172         cache_tree_init(&wc.shared);
4173         btrfs_init_path(&path);
4174
4175 again:
4176         key.offset = 0;
4177         key.objectid = 0;
4178         key.type = BTRFS_ROOT_ITEM_KEY;
4179         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4180         if (ret < 0) {
4181                 err = 1;
4182                 goto out;
4183         }
4184         tree_node = tree_root->node;
4185         while (1) {
4186                 if (tree_node != tree_root->node) {
4187                         free_root_recs_tree(root_cache);
4188                         btrfs_release_path(&path);
4189                         goto again;
4190                 }
4191                 leaf = path.nodes[0];
4192                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4193                         ret = btrfs_next_leaf(tree_root, &path);
4194                         if (ret) {
4195                                 if (ret < 0)
4196                                         err = 1;
4197                                 break;
4198                         }
4199                         leaf = path.nodes[0];
4200                 }
4201                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4202                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4203                     fs_root_objectid(key.objectid)) {
4204                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4205                                 tmp_root = btrfs_read_fs_root_no_cache(
4206                                                 fs_info, &key);
4207                         } else {
4208                                 key.offset = (u64)-1;
4209                                 tmp_root = btrfs_read_fs_root(
4210                                                 fs_info, &key);
4211                         }
4212                         if (IS_ERR(tmp_root)) {
4213                                 err = 1;
4214                                 goto next;
4215                         }
4216                         ret = check_fs_root(tmp_root, root_cache, &wc);
4217                         if (ret == -EAGAIN) {
4218                                 free_root_recs_tree(root_cache);
4219                                 btrfs_release_path(&path);
4220                                 goto again;
4221                         }
4222                         if (ret)
4223                                 err = 1;
4224                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4225                                 btrfs_free_fs_root(tmp_root);
4226                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4227                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4228                         process_root_ref(leaf, path.slots[0], &key,
4229                                          root_cache);
4230                 }
4231 next:
4232                 path.slots[0]++;
4233         }
4234 out:
4235         btrfs_release_path(&path);
4236         if (err)
4237                 free_extent_cache_tree(&wc.shared);
4238         if (!cache_tree_empty(&wc.shared))
4239                 fprintf(stderr, "warning line %d\n", __LINE__);
4240
4241         task_stop(ctx.info);
4242
4243         return err;
4244 }
4245
4246 /*
4247  * Find the @index according by @ino and name.
4248  * Notice:time efficiency is O(N)
4249  *
4250  * @root:       the root of the fs/file tree
4251  * @index_ret:  the index as return value
4252  * @namebuf:    the name to match
4253  * @name_len:   the length of name to match
4254  * @file_type:  the file_type of INODE_ITEM to match
4255  *
4256  * Returns 0 if found and *@index_ret will be modified with right value
4257  * Returns< 0 not found and *@index_ret will be (u64)-1
4258  */
4259 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4260                           u64 *index_ret, char *namebuf, u32 name_len,
4261                           u8 file_type)
4262 {
4263         struct btrfs_path path;
4264         struct extent_buffer *node;
4265         struct btrfs_dir_item *di;
4266         struct btrfs_key key;
4267         struct btrfs_key location;
4268         char name[BTRFS_NAME_LEN] = {0};
4269
4270         u32 total;
4271         u32 cur = 0;
4272         u32 len;
4273         u32 data_len;
4274         u8 filetype;
4275         int slot;
4276         int ret;
4277
4278         ASSERT(index_ret);
4279
4280         /* search from the last index */
4281         key.objectid = dirid;
4282         key.offset = (u64)-1;
4283         key.type = BTRFS_DIR_INDEX_KEY;
4284
4285         btrfs_init_path(&path);
4286         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4287         if (ret < 0)
4288                 return ret;
4289
4290 loop:
4291         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4292         if (ret) {
4293                 ret = -ENOENT;
4294                 *index_ret = (64)-1;
4295                 goto out;
4296         }
4297         /* Check whether inode_id/filetype/name match */
4298         node = path.nodes[0];
4299         slot = path.slots[0];
4300         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4301         total = btrfs_item_size_nr(node, slot);
4302         while (cur < total) {
4303                 ret = -ENOENT;
4304                 len = btrfs_dir_name_len(node, di);
4305                 data_len = btrfs_dir_data_len(node, di);
4306
4307                 btrfs_dir_item_key_to_cpu(node, di, &location);
4308                 if (location.objectid != location_id ||
4309                     location.type != BTRFS_INODE_ITEM_KEY ||
4310                     location.offset != 0)
4311                         goto next;
4312
4313                 filetype = btrfs_dir_type(node, di);
4314                 if (file_type != filetype)
4315                         goto next;
4316
4317                 if (len > BTRFS_NAME_LEN)
4318                         len = BTRFS_NAME_LEN;
4319
4320                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4321                 if (len != name_len || strncmp(namebuf, name, len))
4322                         goto next;
4323
4324                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4325                 *index_ret = key.offset;
4326                 ret = 0;
4327                 goto out;
4328 next:
4329                 len += sizeof(*di) + data_len;
4330                 di = (struct btrfs_dir_item *)((char *)di + len);
4331                 cur += len;
4332         }
4333         goto loop;
4334
4335 out:
4336         btrfs_release_path(&path);
4337         return ret;
4338 }
4339
4340 /*
4341  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4342  * INODE_REF/INODE_EXTREF match.
4343  *
4344  * @root:       the root of the fs/file tree
4345  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4346  *              value while find index
4347  * @location_key: location key of the struct btrfs_dir_item to match
4348  * @name:       the name to match
4349  * @namelen:    the length of name
4350  * @file_type:  the type of file to math
4351  *
4352  * Return 0 if no error occurred.
4353  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4354  * DIR_ITEM/DIR_INDEX
4355  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4356  * and DIR_ITEM/DIR_INDEX mismatch
4357  */
4358 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4359                          struct btrfs_key *location_key, char *name,
4360                          u32 namelen, u8 file_type)
4361 {
4362         struct btrfs_path path;
4363         struct extent_buffer *node;
4364         struct btrfs_dir_item *di;
4365         struct btrfs_key location;
4366         char namebuf[BTRFS_NAME_LEN] = {0};
4367         u32 total;
4368         u32 cur = 0;
4369         u32 len;
4370         u32 data_len;
4371         u8 filetype;
4372         int slot;
4373         int ret;
4374
4375         /* get the index by traversing all index */
4376         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4377                 ret = find_dir_index(root, key->objectid,
4378                                      location_key->objectid, &key->offset,
4379                                      name, namelen, file_type);
4380                 if (ret)
4381                         ret = DIR_INDEX_MISSING;
4382                 return ret;
4383         }
4384
4385         btrfs_init_path(&path);
4386         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4387         if (ret) {
4388                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4389                         DIR_INDEX_MISSING;
4390                 goto out;
4391         }
4392
4393         /* Check whether inode_id/filetype/name match */
4394         node = path.nodes[0];
4395         slot = path.slots[0];
4396         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4397         total = btrfs_item_size_nr(node, slot);
4398         while (cur < total) {
4399                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4400                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4401
4402                 len = btrfs_dir_name_len(node, di);
4403                 data_len = btrfs_dir_data_len(node, di);
4404
4405                 btrfs_dir_item_key_to_cpu(node, di, &location);
4406                 if (location.objectid != location_key->objectid ||
4407                     location.type != location_key->type ||
4408                     location.offset != location_key->offset)
4409                         goto next;
4410
4411                 filetype = btrfs_dir_type(node, di);
4412                 if (file_type != filetype)
4413                         goto next;
4414
4415                 if (len > BTRFS_NAME_LEN) {
4416                         len = BTRFS_NAME_LEN;
4417                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4418                         root->objectid,
4419                         key->type == BTRFS_DIR_ITEM_KEY ?
4420                         "DIR_ITEM" : "DIR_INDEX",
4421                         key->objectid, key->offset, len);
4422                 }
4423                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4424                                    len);
4425                 if (len != namelen || strncmp(namebuf, name, len))
4426                         goto next;
4427
4428                 ret = 0;
4429                 goto out;
4430 next:
4431                 len += sizeof(*di) + data_len;
4432                 di = (struct btrfs_dir_item *)((char *)di + len);
4433                 cur += len;
4434         }
4435
4436 out:
4437         btrfs_release_path(&path);
4438         return ret;
4439 }
4440
4441 /*
4442  * Prints inode ref error message
4443  */
4444 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4445                                 u64 index, const char *namebuf, int name_len,
4446                                 u8 filetype, int err)
4447 {
4448         if (!err)
4449                 return;
4450
4451         /* root dir error */
4452         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4453                 error(
4454         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4455                       root->objectid, key->objectid, key->offset, namebuf);
4456                 return;
4457         }
4458
4459         /* normal error */
4460         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4461                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4462                       root->objectid, key->offset,
4463                       btrfs_name_hash(namebuf, name_len),
4464                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4465                       namebuf, filetype);
4466         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4467                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4468                       root->objectid, key->offset, index,
4469                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4470                       namebuf, filetype);
4471 }
4472
4473 /*
4474  * Insert the missing inode item.
4475  *
4476  * Returns 0 means success.
4477  * Returns <0 means error.
4478  */
4479 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4480                                      u8 filetype)
4481 {
4482         struct btrfs_key key;
4483         struct btrfs_trans_handle *trans;
4484         struct btrfs_path path;
4485         int ret;
4486
4487         key.objectid = ino;
4488         key.type = BTRFS_INODE_ITEM_KEY;
4489         key.offset = 0;
4490
4491         btrfs_init_path(&path);
4492         trans = btrfs_start_transaction(root, 1);
4493         if (IS_ERR(trans)) {
4494                 ret = -EIO;
4495                 goto out;
4496         }
4497
4498         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4499         if (ret < 0 || !ret)
4500                 goto fail;
4501
4502         /* insert inode item */
4503         create_inode_item_lowmem(trans, root, ino, filetype);
4504         ret = 0;
4505 fail:
4506         btrfs_commit_transaction(trans, root);
4507 out:
4508         if (ret)
4509                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4510                       root->objectid, ino);
4511         btrfs_release_path(&path);
4512         return ret;
4513 }
4514
4515 /*
4516  * The ternary means dir item, dir index and relative inode ref.
4517  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4518  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4519  * strategy:
4520  * If two of three is missing or mismatched, delete the existing one.
4521  * If one of three is missing or mismatched, add the missing one.
4522  *
4523  * returns 0 means success.
4524  * returns not 0 means on error;
4525  */
4526 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4527                           u64 index, char *name, int name_len, u8 filetype,
4528                           int err)
4529 {
4530         struct btrfs_trans_handle *trans;
4531         int stage = 0;
4532         int ret = 0;
4533
4534         /*
4535          * stage shall be one of following valild values:
4536          *      0: Fine, nothing to do.
4537          *      1: One of three is wrong, so add missing one.
4538          *      2: Two of three is wrong, so delete existed one.
4539          */
4540         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4541                 stage++;
4542         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4543                 stage++;
4544         if (err & (INODE_REF_MISSING))
4545                 stage++;
4546
4547         /* stage must be smllarer than 3 */
4548         ASSERT(stage < 3);
4549
4550         trans = btrfs_start_transaction(root, 1);
4551         if (stage == 2) {
4552                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4553                                    name_len, 0);
4554                 goto out;
4555         }
4556         if (stage == 1) {
4557                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4558                                filetype, &index, 1, 1);
4559                 goto out;
4560         }
4561 out:
4562         btrfs_commit_transaction(trans, root);
4563
4564         if (ret)
4565                 error("fail to repair inode %llu name %s filetype %u",
4566                       ino, name, filetype);
4567         else
4568                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4569                        stage == 2 ? "Delete" : "Add",
4570                        ino, name, filetype);
4571
4572         return ret;
4573 }
4574
4575 /*
4576  * Traverse the given INODE_REF and call find_dir_item() to find related
4577  * DIR_ITEM/DIR_INDEX.
4578  *
4579  * @root:       the root of the fs/file tree
4580  * @ref_key:    the key of the INODE_REF
4581  * @path        the path provides node and slot
4582  * @refs:       the count of INODE_REF
4583  * @mode:       the st_mode of INODE_ITEM
4584  * @name_ret:   returns with the first ref's name
4585  * @name_len_ret:    len of the name_ret
4586  *
4587  * Return 0 if no error occurred.
4588  */
4589 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4590                            struct btrfs_path *path, char *name_ret,
4591                            u32 *namelen_ret, u64 *refs_ret, int mode)
4592 {
4593         struct btrfs_key key;
4594         struct btrfs_key location;
4595         struct btrfs_inode_ref *ref;
4596         struct extent_buffer *node;
4597         char namebuf[BTRFS_NAME_LEN] = {0};
4598         u32 total;
4599         u32 cur = 0;
4600         u32 len;
4601         u32 name_len;
4602         u64 index;
4603         int ret;
4604         int err = 0;
4605         int tmp_err;
4606         int slot;
4607         int need_research = 0;
4608         u64 refs;
4609
4610 begin:
4611         err = 0;
4612         cur = 0;
4613         refs = *refs_ret;
4614
4615         /* since after repair, path and the dir item may be changed */
4616         if (need_research) {
4617                 need_research = 0;
4618                 btrfs_release_path(path);
4619                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4620                 /* the item was deleted, let path point to the last checked item */
4621                 if (ret > 0) {
4622                         if (path->slots[0] == 0)
4623                                 btrfs_prev_leaf(root, path);
4624                         else
4625                                 path->slots[0]--;
4626                 }
4627                 if (ret)
4628                         goto out;
4629         }
4630
4631         location.objectid = ref_key->objectid;
4632         location.type = BTRFS_INODE_ITEM_KEY;
4633         location.offset = 0;
4634         node = path->nodes[0];
4635         slot = path->slots[0];
4636
4637         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4638         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4639         total = btrfs_item_size_nr(node, slot);
4640
4641 next:
4642         /* Update inode ref count */
4643         refs++;
4644         tmp_err = 0;
4645         index = btrfs_inode_ref_index(node, ref);
4646         name_len = btrfs_inode_ref_name_len(node, ref);
4647
4648         if (name_len <= BTRFS_NAME_LEN) {
4649                 len = name_len;
4650         } else {
4651                 len = BTRFS_NAME_LEN;
4652                 warning("root %llu INODE_REF[%llu %llu] name too long",
4653                         root->objectid, ref_key->objectid, ref_key->offset);
4654         }
4655
4656         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4657
4658         /* copy the first name found to name_ret */
4659         if (refs == 1 && name_ret) {
4660                 memcpy(name_ret, namebuf, len);
4661                 *namelen_ret = len;
4662         }
4663
4664         /* Check root dir ref */
4665         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4666                 if (index != 0 || len != strlen("..") ||
4667                     strncmp("..", namebuf, len) ||
4668                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4669                         /* set err bits then repair will delete the ref */
4670                         err |= DIR_INDEX_MISSING;
4671                         err |= DIR_ITEM_MISSING;
4672                 }
4673                 goto end;
4674         }
4675
4676         /* Find related DIR_INDEX */
4677         key.objectid = ref_key->offset;
4678         key.type = BTRFS_DIR_INDEX_KEY;
4679         key.offset = index;
4680         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4681                             imode_to_type(mode));
4682
4683         /* Find related dir_item */
4684         key.objectid = ref_key->offset;
4685         key.type = BTRFS_DIR_ITEM_KEY;
4686         key.offset = btrfs_name_hash(namebuf, len);
4687         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4688                             imode_to_type(mode));
4689 end:
4690         if (tmp_err && repair) {
4691                 ret = repair_ternary_lowmem(root, ref_key->offset,
4692                                             ref_key->objectid, index, namebuf,
4693                                             name_len, imode_to_type(mode),
4694                                             tmp_err);
4695                 if (!ret) {
4696                         need_research = 1;
4697                         goto begin;
4698                 }
4699         }
4700         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4701                             imode_to_type(mode), tmp_err);
4702         err |= tmp_err;
4703         len = sizeof(*ref) + name_len;
4704         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4705         cur += len;
4706         if (cur < total)
4707                 goto next;
4708
4709 out:
4710         *refs_ret = refs;
4711         return err;
4712 }
4713
4714 /*
4715  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4716  * DIR_ITEM/DIR_INDEX.
4717  *
4718  * @root:       the root of the fs/file tree
4719  * @ref_key:    the key of the INODE_EXTREF
4720  * @refs:       the count of INODE_EXTREF
4721  * @mode:       the st_mode of INODE_ITEM
4722  *
4723  * Return 0 if no error occurred.
4724  */
4725 static int check_inode_extref(struct btrfs_root *root,
4726                               struct btrfs_key *ref_key,
4727                               struct extent_buffer *node, int slot, u64 *refs,
4728                               int mode)
4729 {
4730         struct btrfs_key key;
4731         struct btrfs_key location;
4732         struct btrfs_inode_extref *extref;
4733         char namebuf[BTRFS_NAME_LEN] = {0};
4734         u32 total;
4735         u32 cur = 0;
4736         u32 len;
4737         u32 name_len;
4738         u64 index;
4739         u64 parent;
4740         int ret;
4741         int err = 0;
4742
4743         location.objectid = ref_key->objectid;
4744         location.type = BTRFS_INODE_ITEM_KEY;
4745         location.offset = 0;
4746
4747         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4748         total = btrfs_item_size_nr(node, slot);
4749
4750 next:
4751         /* update inode ref count */
4752         (*refs)++;
4753         name_len = btrfs_inode_extref_name_len(node, extref);
4754         index = btrfs_inode_extref_index(node, extref);
4755         parent = btrfs_inode_extref_parent(node, extref);
4756         if (name_len <= BTRFS_NAME_LEN) {
4757                 len = name_len;
4758         } else {
4759                 len = BTRFS_NAME_LEN;
4760                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4761                         root->objectid, ref_key->objectid, ref_key->offset);
4762         }
4763         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4764
4765         /* Check root dir ref name */
4766         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4767                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4768                       root->objectid, ref_key->objectid, ref_key->offset,
4769                       namebuf);
4770                 err |= ROOT_DIR_ERROR;
4771         }
4772
4773         /* find related dir_index */
4774         key.objectid = parent;
4775         key.type = BTRFS_DIR_INDEX_KEY;
4776         key.offset = index;
4777         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4778         err |= ret;
4779
4780         /* find related dir_item */
4781         key.objectid = parent;
4782         key.type = BTRFS_DIR_ITEM_KEY;
4783         key.offset = btrfs_name_hash(namebuf, len);
4784         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4785         err |= ret;
4786
4787         len = sizeof(*extref) + name_len;
4788         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4789         cur += len;
4790
4791         if (cur < total)
4792                 goto next;
4793
4794         return err;
4795 }
4796
4797 /*
4798  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4799  * DIR_ITEM/DIR_INDEX match.
4800  * Return with @index_ret.
4801  *
4802  * @root:       the root of the fs/file tree
4803  * @key:        the key of the INODE_REF/INODE_EXTREF
4804  * @name:       the name in the INODE_REF/INODE_EXTREF
4805  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4806  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4807  *              value (64)-1 means do not check index
4808  * @ext_ref:    the EXTENDED_IREF feature
4809  *
4810  * Return 0 if no error occurred.
4811  * Return >0 for error bitmap
4812  */
4813 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4814                           char *name, int namelen, u64 *index_ret,
4815                           unsigned int ext_ref)
4816 {
4817         struct btrfs_path path;
4818         struct btrfs_inode_ref *ref;
4819         struct btrfs_inode_extref *extref;
4820         struct extent_buffer *node;
4821         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4822         u32 total;
4823         u32 cur = 0;
4824         u32 len;
4825         u32 ref_namelen;
4826         u64 ref_index;
4827         u64 parent;
4828         u64 dir_id;
4829         int slot;
4830         int ret;
4831
4832         ASSERT(index_ret);
4833
4834         btrfs_init_path(&path);
4835         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4836         if (ret) {
4837                 ret = INODE_REF_MISSING;
4838                 goto extref;
4839         }
4840
4841         node = path.nodes[0];
4842         slot = path.slots[0];
4843
4844         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4845         total = btrfs_item_size_nr(node, slot);
4846
4847         /* Iterate all entry of INODE_REF */
4848         while (cur < total) {
4849                 ret = INODE_REF_MISSING;
4850
4851                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4852                 ref_index = btrfs_inode_ref_index(node, ref);
4853                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4854                         goto next_ref;
4855
4856                 if (cur + sizeof(*ref) + ref_namelen > total ||
4857                     ref_namelen > BTRFS_NAME_LEN) {
4858                         warning("root %llu INODE %s[%llu %llu] name too long",
4859                                 root->objectid,
4860                                 key->type == BTRFS_INODE_REF_KEY ?
4861                                         "REF" : "EXTREF",
4862                                 key->objectid, key->offset);
4863
4864                         if (cur + sizeof(*ref) > total)
4865                                 break;
4866                         len = min_t(u32, total - cur - sizeof(*ref),
4867                                     BTRFS_NAME_LEN);
4868                 } else {
4869                         len = ref_namelen;
4870                 }
4871
4872                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4873                                    len);
4874
4875                 if (len != namelen || strncmp(ref_namebuf, name, len))
4876                         goto next_ref;
4877
4878                 *index_ret = ref_index;
4879                 ret = 0;
4880                 goto out;
4881 next_ref:
4882                 len = sizeof(*ref) + ref_namelen;
4883                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4884                 cur += len;
4885         }
4886
4887 extref:
4888         /* Skip if not support EXTENDED_IREF feature */
4889         if (!ext_ref)
4890                 goto out;
4891
4892         btrfs_release_path(&path);
4893         btrfs_init_path(&path);
4894
4895         dir_id = key->offset;
4896         key->type = BTRFS_INODE_EXTREF_KEY;
4897         key->offset = btrfs_extref_hash(dir_id, name, namelen);
4898
4899         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4900         if (ret) {
4901                 ret = INODE_REF_MISSING;
4902                 goto out;
4903         }
4904
4905         node = path.nodes[0];
4906         slot = path.slots[0];
4907
4908         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4909         cur = 0;
4910         total = btrfs_item_size_nr(node, slot);
4911
4912         /* Iterate all entry of INODE_EXTREF */
4913         while (cur < total) {
4914                 ret = INODE_REF_MISSING;
4915
4916                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
4917                 ref_index = btrfs_inode_extref_index(node, extref);
4918                 parent = btrfs_inode_extref_parent(node, extref);
4919                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4920                         goto next_extref;
4921
4922                 if (parent != dir_id)
4923                         goto next_extref;
4924
4925                 if (ref_namelen <= BTRFS_NAME_LEN) {
4926                         len = ref_namelen;
4927                 } else {
4928                         len = BTRFS_NAME_LEN;
4929                         warning("root %llu INODE %s[%llu %llu] name too long",
4930                                 root->objectid,
4931                                 key->type == BTRFS_INODE_REF_KEY ?
4932                                         "REF" : "EXTREF",
4933                                 key->objectid, key->offset);
4934                 }
4935                 read_extent_buffer(node, ref_namebuf,
4936                                    (unsigned long)(extref + 1), len);
4937
4938                 if (len != namelen || strncmp(ref_namebuf, name, len))
4939                         goto next_extref;
4940
4941                 *index_ret = ref_index;
4942                 ret = 0;
4943                 goto out;
4944
4945 next_extref:
4946                 len = sizeof(*extref) + ref_namelen;
4947                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
4948                 cur += len;
4949
4950         }
4951 out:
4952         btrfs_release_path(&path);
4953         return ret;
4954 }
4955
4956 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
4957                                u64 ino, u64 index, const char *namebuf,
4958                                int name_len, u8 filetype, int err)
4959 {
4960         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
4961                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
4962                       root->objectid, key->objectid, key->offset, namebuf,
4963                       filetype,
4964                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4965         }
4966
4967         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
4968                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
4969                       root->objectid, key->objectid, index, namebuf, filetype,
4970                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
4971         }
4972
4973         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
4974                 error(
4975                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
4976                       root->objectid, ino, index, namebuf, filetype,
4977                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
4978         }
4979
4980         if (err & INODE_REF_MISSING)
4981                 error(
4982                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
4983                       root->objectid, ino, key->objectid, namebuf, filetype);
4984
4985 }
4986
4987 /*
4988  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
4989  *
4990  * Returns error after repair
4991  */
4992 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
4993                            u64 index, u8 filetype, char *namebuf, u32 name_len,
4994                            int err)
4995 {
4996         int ret;
4997
4998         if (err & INODE_ITEM_MISSING) {
4999                 ret = repair_inode_item_missing(root, ino, filetype);
5000                 if (!ret)
5001                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5002         }
5003
5004         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5005                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5006                                             name_len, filetype, err);
5007                 if (!ret) {
5008                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5009                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5010                         err &= ~(INODE_REF_MISSING);
5011                 }
5012         }
5013         return err;
5014 }
5015
5016 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5017                 u64 *size_ret)
5018 {
5019         struct btrfs_key key;
5020         struct btrfs_path path;
5021         u32 len;
5022         struct btrfs_dir_item *di;
5023         int ret;
5024         int cur = 0;
5025         int total = 0;
5026
5027         ASSERT(size_ret);
5028         *size_ret = 0;
5029
5030         key.objectid = ino;
5031         key.type = type;
5032         key.offset = (u64)-1;
5033
5034         btrfs_init_path(&path);
5035         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5036         if (ret < 0) {
5037                 ret = -EIO;
5038                 goto out;
5039         }
5040         /* if found, go to spacial case */
5041         if (ret == 0)
5042                 goto special_case;
5043
5044 loop:
5045         ret = btrfs_previous_item(root, &path, ino, type);
5046
5047         if (ret) {
5048                 ret = 0;
5049                 goto out;
5050         }
5051
5052 special_case:
5053         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5054         cur = 0;
5055         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5056
5057         while (cur < total) {
5058                 len = btrfs_dir_name_len(path.nodes[0], di);
5059                 if (len > BTRFS_NAME_LEN)
5060                         len = BTRFS_NAME_LEN;
5061                 *size_ret += len;
5062
5063                 len += btrfs_dir_data_len(path.nodes[0], di);
5064                 len += sizeof(*di);
5065                 di = (struct btrfs_dir_item *)((char *)di + len);
5066                 cur += len;
5067         }
5068         goto loop;
5069
5070 out:
5071         btrfs_release_path(&path);
5072         return ret;
5073 }
5074
5075 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5076 {
5077         u64 item_size;
5078         u64 index_size;
5079         int ret;
5080
5081         ASSERT(size);
5082         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5083         if (ret)
5084                 goto out;
5085
5086         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5087         if (ret)
5088                 goto out;
5089
5090         *size = item_size + index_size;
5091
5092 out:
5093         if (ret)
5094                 error("failed to count root %llu INODE[%llu] root size",
5095                       root->objectid, ino);
5096         return ret;
5097 }
5098
5099 /*
5100  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5101  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5102  *
5103  * @root:       the root of the fs/file tree
5104  * @key:        the key of the INODE_REF/INODE_EXTREF
5105  * @path:       the path
5106  * @size:       the st_size of the INODE_ITEM
5107  * @ext_ref:    the EXTENDED_IREF feature
5108  *
5109  * Return 0 if no error occurred.
5110  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5111  */
5112 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5113                           struct btrfs_path *path, u64 *size,
5114                           unsigned int ext_ref)
5115 {
5116         struct btrfs_dir_item *di;
5117         struct btrfs_inode_item *ii;
5118         struct btrfs_key key;
5119         struct btrfs_key location;
5120         struct extent_buffer *node;
5121         int slot;
5122         char namebuf[BTRFS_NAME_LEN] = {0};
5123         u32 total;
5124         u32 cur = 0;
5125         u32 len;
5126         u32 name_len;
5127         u32 data_len;
5128         u8 filetype;
5129         u32 mode = 0;
5130         u64 index;
5131         int ret;
5132         int err;
5133         int tmp_err;
5134         int need_research = 0;
5135
5136         /*
5137          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5138          * ignore index check.
5139          */
5140         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5141                 index = di_key->offset;
5142         else
5143                 index = (u64)-1;
5144 begin:
5145         err = 0;
5146         cur = 0;
5147
5148         /* since after repair, path and the dir item may be changed */
5149         if (need_research) {
5150                 need_research = 0;
5151                 err |= DIR_COUNT_AGAIN;
5152                 btrfs_release_path(path);
5153                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5154                 /* the item was deleted, let path point the last checked item */
5155                 if (ret > 0) {
5156                         if (path->slots[0] == 0)
5157                                 btrfs_prev_leaf(root, path);
5158                         else
5159                                 path->slots[0]--;
5160                 }
5161                 if (ret)
5162                         goto out;
5163         }
5164
5165         node = path->nodes[0];
5166         slot = path->slots[0];
5167
5168         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5169         total = btrfs_item_size_nr(node, slot);
5170         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5171
5172         while (cur < total) {
5173                 data_len = btrfs_dir_data_len(node, di);
5174                 tmp_err = 0;
5175                 if (data_len)
5176                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5177                               root->objectid,
5178               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5179                               di_key->objectid, di_key->offset, data_len);
5180
5181                 name_len = btrfs_dir_name_len(node, di);
5182                 if (name_len <= BTRFS_NAME_LEN) {
5183                         len = name_len;
5184                 } else {
5185                         len = BTRFS_NAME_LEN;
5186                         warning("root %llu %s[%llu %llu] name too long",
5187                                 root->objectid,
5188                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5189                                 di_key->objectid, di_key->offset);
5190                 }
5191                 (*size) += name_len;
5192                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5193                                    len);
5194                 filetype = btrfs_dir_type(node, di);
5195
5196                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5197                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5198                         err |= -EIO;
5199                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5200                         root->objectid, di_key->objectid, di_key->offset,
5201                         namebuf, len, filetype, di_key->offset,
5202                         btrfs_name_hash(namebuf, len));
5203                 }
5204
5205                 btrfs_dir_item_key_to_cpu(node, di, &location);
5206                 /* Ignore related ROOT_ITEM check */
5207                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5208                         goto next;
5209
5210                 btrfs_release_path(path);
5211                 /* Check relative INODE_ITEM(existence/filetype) */
5212                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5213                 if (ret) {
5214                         tmp_err |= INODE_ITEM_MISSING;
5215                         goto next;
5216                 }
5217
5218                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5219                                     struct btrfs_inode_item);
5220                 mode = btrfs_inode_mode(path->nodes[0], ii);
5221                 if (imode_to_type(mode) != filetype) {
5222                         tmp_err |= INODE_ITEM_MISMATCH;
5223                         goto next;
5224                 }
5225
5226                 /* Check relative INODE_REF/INODE_EXTREF */
5227                 key.objectid = location.objectid;
5228                 key.type = BTRFS_INODE_REF_KEY;
5229                 key.offset = di_key->objectid;
5230                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5231                                           &index, ext_ref);
5232
5233                 /* check relative INDEX/ITEM */
5234                 key.objectid = di_key->objectid;
5235                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5236                         key.type = BTRFS_DIR_INDEX_KEY;
5237                         key.offset = index;
5238                 } else {
5239                         key.type = BTRFS_DIR_ITEM_KEY;
5240                         key.offset = btrfs_name_hash(namebuf, name_len);
5241                 }
5242
5243                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5244                                          name_len, filetype);
5245                 /* find_dir_item may find index */
5246                 if (key.type == BTRFS_DIR_INDEX_KEY)
5247                         index = key.offset;
5248 next:
5249
5250                 if (tmp_err && repair) {
5251                         ret = repair_dir_item(root, di_key->objectid,
5252                                               location.objectid, index,
5253                                               imode_to_type(mode), namebuf,
5254                                               name_len, tmp_err);
5255                         if (ret != tmp_err) {
5256                                 need_research = 1;
5257                                 goto begin;
5258                         }
5259                 }
5260                 btrfs_release_path(path);
5261                 print_dir_item_err(root, di_key, location.objectid, index,
5262                                    namebuf, name_len, filetype, tmp_err);
5263                 err |= tmp_err;
5264                 len = sizeof(*di) + name_len + data_len;
5265                 di = (struct btrfs_dir_item *)((char *)di + len);
5266                 cur += len;
5267
5268                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5269                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5270                               root->objectid, di_key->objectid,
5271                               di_key->offset);
5272                         break;
5273                 }
5274         }
5275 out:
5276         /* research path */
5277         btrfs_release_path(path);
5278         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5279         if (ret)
5280                 err |= ret > 0 ? -ENOENT : ret;
5281         return err;
5282 }
5283
5284 /*
5285  * Wrapper function of btrfs_punch_hole.
5286  *
5287  * Returns 0 means success.
5288  * Returns not 0 means error.
5289  */
5290 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5291                              u64 len)
5292 {
5293         struct btrfs_trans_handle *trans;
5294         int ret = 0;
5295
5296         trans = btrfs_start_transaction(root, 1);
5297         if (IS_ERR(trans))
5298                 return PTR_ERR(trans);
5299
5300         ret = btrfs_punch_hole(trans, root, ino, start, len);
5301         if (ret)
5302                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5303                       start, len, ino);
5304         else
5305                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5306                        ino);
5307
5308         btrfs_commit_transaction(trans, root);
5309         return ret;
5310 }
5311
5312 /*
5313  * Check file extent datasum/hole, update the size of the file extents,
5314  * check and update the last offset of the file extent.
5315  *
5316  * @root:       the root of fs/file tree.
5317  * @fkey:       the key of the file extent.
5318  * @nodatasum:  INODE_NODATASUM feature.
5319  * @size:       the sum of all EXTENT_DATA items size for this inode.
5320  * @end:        the offset of the last extent.
5321  *
5322  * Return 0 if no error occurred.
5323  */
5324 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5325                              struct extent_buffer *node, int slot,
5326                              unsigned int nodatasum, u64 *size, u64 *end)
5327 {
5328         struct btrfs_file_extent_item *fi;
5329         u64 disk_bytenr;
5330         u64 disk_num_bytes;
5331         u64 extent_num_bytes;
5332         u64 extent_offset;
5333         u64 csum_found;         /* In byte size, sectorsize aligned */
5334         u64 search_start;       /* Logical range start we search for csum */
5335         u64 search_len;         /* Logical range len we search for csum */
5336         unsigned int extent_type;
5337         unsigned int is_hole;
5338         int compressed = 0;
5339         int ret;
5340         int err = 0;
5341
5342         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5343
5344         /* Check inline extent */
5345         extent_type = btrfs_file_extent_type(node, fi);
5346         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5347                 struct btrfs_item *e = btrfs_item_nr(slot);
5348                 u32 item_inline_len;
5349
5350                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5351                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5352                 compressed = btrfs_file_extent_compression(node, fi);
5353                 if (extent_num_bytes == 0) {
5354                         error(
5355                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5356                                 root->objectid, fkey->objectid, fkey->offset);
5357                         err |= FILE_EXTENT_ERROR;
5358                 }
5359                 if (!compressed && extent_num_bytes != item_inline_len) {
5360                         error(
5361                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5362                                 root->objectid, fkey->objectid, fkey->offset,
5363                                 extent_num_bytes, item_inline_len);
5364                         err |= FILE_EXTENT_ERROR;
5365                 }
5366                 *end += extent_num_bytes;
5367                 *size += extent_num_bytes;
5368                 return err;
5369         }
5370
5371         /* Check extent type */
5372         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5373                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5374                 err |= FILE_EXTENT_ERROR;
5375                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5376                       root->objectid, fkey->objectid, fkey->offset);
5377                 return err;
5378         }
5379
5380         /* Check REG_EXTENT/PREALLOC_EXTENT */
5381         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5382         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5383         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5384         extent_offset = btrfs_file_extent_offset(node, fi);
5385         compressed = btrfs_file_extent_compression(node, fi);
5386         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5387
5388         /*
5389          * Check EXTENT_DATA csum
5390          *
5391          * For plain (uncompressed) extent, we should only check the range
5392          * we're referring to, as it's possible that part of prealloc extent
5393          * has been written, and has csum:
5394          *
5395          * |<--- Original large preallocated extent A ---->|
5396          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5397          *      No csum                         Has csum
5398          *
5399          * For compressed extent, we should check the whole range.
5400          */
5401         if (!compressed) {
5402                 search_start = disk_bytenr + extent_offset;
5403                 search_len = extent_num_bytes;
5404         } else {
5405                 search_start = disk_bytenr;
5406                 search_len = disk_num_bytes;
5407         }
5408         ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found);
5409         if (csum_found > 0 && nodatasum) {
5410                 err |= ODD_CSUM_ITEM;
5411                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5412                       root->objectid, fkey->objectid, fkey->offset);
5413         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5414                    !is_hole && (ret < 0 || csum_found < search_len)) {
5415                 err |= CSUM_ITEM_MISSING;
5416                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5417                       root->objectid, fkey->objectid, fkey->offset,
5418                       csum_found, search_len);
5419         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5420                 err |= ODD_CSUM_ITEM;
5421                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5422                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5423         }
5424
5425         /* Check EXTENT_DATA hole */
5426         if (!no_holes && *end != fkey->offset) {
5427                 if (repair)
5428                         ret = punch_extent_hole(root, fkey->objectid,
5429                                                 *end, fkey->offset - *end);
5430                 if (!repair || ret) {
5431                         err |= FILE_EXTENT_ERROR;
5432                         error(
5433 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5434                                 root->objectid, fkey->objectid, fkey->offset,
5435                                 fkey->objectid, *end);
5436                 }
5437         }
5438
5439         *end += extent_num_bytes;
5440         if (!is_hole)
5441                 *size += extent_num_bytes;
5442
5443         return err;
5444 }
5445
5446 /*
5447  * Set inode item nbytes to @nbytes
5448  *
5449  * Returns  0     on success
5450  * Returns  != 0  on error
5451  */
5452 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5453                                       struct btrfs_path *path,
5454                                       u64 ino, u64 nbytes)
5455 {
5456         struct btrfs_trans_handle *trans;
5457         struct btrfs_inode_item *ii;
5458         struct btrfs_key key;
5459         struct btrfs_key research_key;
5460         int err = 0;
5461         int ret;
5462
5463         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5464
5465         key.objectid = ino;
5466         key.type = BTRFS_INODE_ITEM_KEY;
5467         key.offset = 0;
5468
5469         trans = btrfs_start_transaction(root, 1);
5470         if (IS_ERR(trans)) {
5471                 ret = PTR_ERR(trans);
5472                 err |= ret;
5473                 goto out;
5474         }
5475
5476         btrfs_release_path(path);
5477         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5478         if (ret > 0)
5479                 ret = -ENOENT;
5480         if (ret) {
5481                 err |= ret;
5482                 goto fail;
5483         }
5484
5485         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5486                             struct btrfs_inode_item);
5487         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5488         btrfs_mark_buffer_dirty(path->nodes[0]);
5489 fail:
5490         btrfs_commit_transaction(trans, root);
5491 out:
5492         if (ret)
5493                 error("failed to set nbytes in inode %llu root %llu",
5494                       ino, root->root_key.objectid);
5495         else
5496                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5497                        root->root_key.objectid, nbytes);
5498
5499         /* research path */
5500         btrfs_release_path(path);
5501         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5502         err |= ret;
5503
5504         return err;
5505 }
5506
5507 /*
5508  * Set directory inode isize to @isize.
5509  *
5510  * Returns 0     on success.
5511  * Returns != 0  on error.
5512  */
5513 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5514                                    struct btrfs_path *path,
5515                                    u64 ino, u64 isize)
5516 {
5517         struct btrfs_trans_handle *trans;
5518         struct btrfs_inode_item *ii;
5519         struct btrfs_key key;
5520         struct btrfs_key research_key;
5521         int ret;
5522         int err = 0;
5523
5524         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5525
5526         key.objectid = ino;
5527         key.type = BTRFS_INODE_ITEM_KEY;
5528         key.offset = 0;
5529
5530         trans = btrfs_start_transaction(root, 1);
5531         if (IS_ERR(trans)) {
5532                 ret = PTR_ERR(trans);
5533                 err |= ret;
5534                 goto out;
5535         }
5536
5537         btrfs_release_path(path);
5538         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5539         if (ret > 0)
5540                 ret = -ENOENT;
5541         if (ret) {
5542                 err |= ret;
5543                 goto fail;
5544         }
5545
5546         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5547                             struct btrfs_inode_item);
5548         btrfs_set_inode_size(path->nodes[0], ii, isize);
5549         btrfs_mark_buffer_dirty(path->nodes[0]);
5550 fail:
5551         btrfs_commit_transaction(trans, root);
5552 out:
5553         if (ret)
5554                 error("failed to set isize in inode %llu root %llu",
5555                       ino, root->root_key.objectid);
5556         else
5557                 printf("Set isize in inode %llu root %llu to %llu\n",
5558                        ino, root->root_key.objectid, isize);
5559
5560         btrfs_release_path(path);
5561         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5562         err |= ret;
5563
5564         return err;
5565 }
5566
5567 /*
5568  * Wrapper function for btrfs_add_orphan_item().
5569  *
5570  * Returns 0     on success.
5571  * Returns != 0  on error.
5572  */
5573 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5574                                            struct btrfs_path *path, u64 ino)
5575 {
5576         struct btrfs_trans_handle *trans;
5577         struct btrfs_key research_key;
5578         int ret;
5579         int err = 0;
5580
5581         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5582
5583         trans = btrfs_start_transaction(root, 1);
5584         if (IS_ERR(trans)) {
5585                 ret = PTR_ERR(trans);
5586                 err |= ret;
5587                 goto out;
5588         }
5589
5590         btrfs_release_path(path);
5591         ret = btrfs_add_orphan_item(trans, root, path, ino);
5592         err |= ret;
5593         btrfs_commit_transaction(trans, root);
5594 out:
5595         if (ret)
5596                 error("failed to add inode %llu as orphan item root %llu",
5597                       ino, root->root_key.objectid);
5598         else
5599                 printf("Added inode %llu as orphan item root %llu\n",
5600                        ino, root->root_key.objectid);
5601
5602         btrfs_release_path(path);
5603         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5604         err |= ret;
5605
5606         return err;
5607 }
5608
5609 /* Set inode_item nlink to @ref_count.
5610  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5611  *
5612  * Returns 0 on success
5613  */
5614 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5615                                       struct btrfs_path *path, u64 ino,
5616                                       const char *name, u32 namelen,
5617                                       u64 ref_count, u8 filetype, u64 *nlink)
5618 {
5619         struct btrfs_trans_handle *trans;
5620         struct btrfs_inode_item *ii;
5621         struct btrfs_key key;
5622         struct btrfs_key old_key;
5623         char namebuf[BTRFS_NAME_LEN] = {0};
5624         int name_len;
5625         int ret;
5626         int ret2;
5627
5628         /* save the key */
5629         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5630
5631         if (name && namelen) {
5632                 ASSERT(namelen <= BTRFS_NAME_LEN);
5633                 memcpy(namebuf, name, namelen);
5634                 name_len = namelen;
5635         } else {
5636                 sprintf(namebuf, "%llu", ino);
5637                 name_len = count_digits(ino);
5638                 printf("Can't find file name for inode %llu, use %s instead\n",
5639                        ino, namebuf);
5640         }
5641
5642         trans = btrfs_start_transaction(root, 1);
5643         if (IS_ERR(trans)) {
5644                 ret = PTR_ERR(trans);
5645                 goto out;
5646         }
5647
5648         btrfs_release_path(path);
5649         /* if refs is 0, put it into lostfound */
5650         if (ref_count == 0) {
5651                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5652                                               name_len, filetype, &ref_count);
5653                 if (ret)
5654                         goto fail;
5655         }
5656
5657         /* reset inode_item's nlink to ref_count */
5658         key.objectid = ino;
5659         key.type = BTRFS_INODE_ITEM_KEY;
5660         key.offset = 0;
5661
5662         btrfs_release_path(path);
5663         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5664         if (ret > 0)
5665                 ret = -ENOENT;
5666         if (ret)
5667                 goto fail;
5668
5669         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5670                             struct btrfs_inode_item);
5671         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5672         btrfs_mark_buffer_dirty(path->nodes[0]);
5673
5674         if (nlink)
5675                 *nlink = ref_count;
5676 fail:
5677         btrfs_commit_transaction(trans, root);
5678 out:
5679         if (ret)
5680                 error(
5681         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5682                        root->objectid, ino, namebuf, filetype);
5683         else
5684                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5685                        root->objectid, ino, namebuf, filetype);
5686
5687         /* research */
5688         btrfs_release_path(path);
5689         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5690         if (ret2 < 0)
5691                 return ret |= ret2;
5692         return ret;
5693 }
5694
5695 /*
5696  * Check INODE_ITEM and related ITEMs (the same inode number)
5697  * 1. check link count
5698  * 2. check inode ref/extref
5699  * 3. check dir item/index
5700  *
5701  * @ext_ref:    the EXTENDED_IREF feature
5702  *
5703  * Return 0 if no error occurred.
5704  * Return >0 for error or hit the traversal is done(by error bitmap)
5705  */
5706 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5707                             unsigned int ext_ref)
5708 {
5709         struct extent_buffer *node;
5710         struct btrfs_inode_item *ii;
5711         struct btrfs_key key;
5712         struct btrfs_key last_key;
5713         u64 inode_id;
5714         u32 mode;
5715         u64 nlink;
5716         u64 nbytes;
5717         u64 isize;
5718         u64 size = 0;
5719         u64 refs = 0;
5720         u64 extent_end = 0;
5721         u64 extent_size = 0;
5722         unsigned int dir;
5723         unsigned int nodatasum;
5724         int slot;
5725         int ret;
5726         int err = 0;
5727         char namebuf[BTRFS_NAME_LEN] = {0};
5728         u32 name_len = 0;
5729
5730         node = path->nodes[0];
5731         slot = path->slots[0];
5732
5733         btrfs_item_key_to_cpu(node, &key, slot);
5734         inode_id = key.objectid;
5735
5736         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5737                 ret = btrfs_next_item(root, path);
5738                 if (ret > 0)
5739                         err |= LAST_ITEM;
5740                 return err;
5741         }
5742
5743         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5744         isize = btrfs_inode_size(node, ii);
5745         nbytes = btrfs_inode_nbytes(node, ii);
5746         mode = btrfs_inode_mode(node, ii);
5747         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5748         nlink = btrfs_inode_nlink(node, ii);
5749         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5750
5751         while (1) {
5752                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5753                 ret = btrfs_next_item(root, path);
5754                 if (ret < 0) {
5755                         /* out will fill 'err' rusing current statistics */
5756                         goto out;
5757                 } else if (ret > 0) {
5758                         err |= LAST_ITEM;
5759                         goto out;
5760                 }
5761
5762                 node = path->nodes[0];
5763                 slot = path->slots[0];
5764                 btrfs_item_key_to_cpu(node, &key, slot);
5765                 if (key.objectid != inode_id)
5766                         goto out;
5767
5768                 switch (key.type) {
5769                 case BTRFS_INODE_REF_KEY:
5770                         ret = check_inode_ref(root, &key, path, namebuf,
5771                                               &name_len, &refs, mode);
5772                         err |= ret;
5773                         break;
5774                 case BTRFS_INODE_EXTREF_KEY:
5775                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5776                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5777                                         root->objectid, key.objectid,
5778                                         key.offset);
5779                         ret = check_inode_extref(root, &key, node, slot, &refs,
5780                                                  mode);
5781                         err |= ret;
5782                         break;
5783                 case BTRFS_DIR_ITEM_KEY:
5784                 case BTRFS_DIR_INDEX_KEY:
5785                         if (!dir) {
5786                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5787                                         root->objectid, inode_id,
5788                                         imode_to_type(mode), key.objectid,
5789                                         key.offset);
5790                         }
5791                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5792                         err |= ret;
5793                         break;
5794                 case BTRFS_EXTENT_DATA_KEY:
5795                         if (dir) {
5796                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5797                                         root->objectid, inode_id, key.objectid,
5798                                         key.offset);
5799                         }
5800                         ret = check_file_extent(root, &key, node, slot,
5801                                                 nodatasum, &extent_size,
5802                                                 &extent_end);
5803                         err |= ret;
5804                         break;
5805                 case BTRFS_XATTR_ITEM_KEY:
5806                         break;
5807                 default:
5808                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5809                               key.objectid, key.type, key.offset);
5810                 }
5811         }
5812
5813 out:
5814         if (err & LAST_ITEM) {
5815                 btrfs_release_path(path);
5816                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5817                 if (ret)
5818                         return err;
5819         }
5820
5821         /* verify INODE_ITEM nlink/isize/nbytes */
5822         if (dir) {
5823                 if (repair && (err & DIR_COUNT_AGAIN)) {
5824                         err &= ~DIR_COUNT_AGAIN;
5825                         count_dir_isize(root, inode_id, &size);
5826                 }
5827
5828                 if ((nlink != 1 || refs != 1) && repair) {
5829                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5830                                 namebuf, name_len, refs, imode_to_type(mode),
5831                                 &nlink);
5832                 }
5833
5834                 if (nlink != 1) {
5835                         err |= LINK_COUNT_ERROR;
5836                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5837                               root->objectid, inode_id, nlink);
5838                 }
5839
5840                 /*
5841                  * Just a warning, as dir inode nbytes is just an
5842                  * instructive value.
5843                  */
5844                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5845                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5846                                 root->objectid, inode_id,
5847                                 root->fs_info->nodesize);
5848                 }
5849
5850                 if (isize != size) {
5851                         if (repair)
5852                                 ret = repair_dir_isize_lowmem(root, path,
5853                                                               inode_id, size);
5854                         if (!repair || ret) {
5855                                 err |= ISIZE_ERROR;
5856                                 error(
5857                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5858                                       root->objectid, inode_id, isize, size);
5859                         }
5860                 }
5861         } else {
5862                 if (nlink != refs) {
5863                         if (repair)
5864                                 ret = repair_inode_nlinks_lowmem(root, path,
5865                                          inode_id, namebuf, name_len, refs,
5866                                          imode_to_type(mode), &nlink);
5867                         if (!repair || ret) {
5868                                 err |= LINK_COUNT_ERROR;
5869                                 error(
5870                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5871                                       root->objectid, inode_id, nlink, refs);
5872                         }
5873                 } else if (!nlink) {
5874                         if (repair)
5875                                 ret = repair_inode_orphan_item_lowmem(root,
5876                                                               path, inode_id);
5877                         if (!repair || ret) {
5878                                 err |= ORPHAN_ITEM;
5879                                 error("root %llu INODE[%llu] is orphan item",
5880                                       root->objectid, inode_id);
5881                         }
5882                 }
5883
5884                 if (!nbytes && !no_holes && extent_end < isize) {
5885                         if (repair)
5886                                 ret = punch_extent_hole(root, inode_id,
5887                                                 extent_end, isize - extent_end);
5888                         if (!repair || ret) {
5889                                 err |= NBYTES_ERROR;
5890                                 error(
5891         "root %llu INODE[%llu] size %llu should have a file extent hole",
5892                                       root->objectid, inode_id, isize);
5893                         }
5894                 }
5895
5896                 if (nbytes != extent_size) {
5897                         if (repair)
5898                                 ret = repair_inode_nbytes_lowmem(root, path,
5899                                                          inode_id, extent_size);
5900                         if (!repair || ret) {
5901                                 err |= NBYTES_ERROR;
5902                                 error(
5903         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
5904                                       root->objectid, inode_id, nbytes,
5905                                       extent_size);
5906                         }
5907                 }
5908         }
5909
5910         if (err & LAST_ITEM)
5911                 btrfs_next_item(root, path);
5912         return err;
5913 }
5914
5915 /*
5916  * Insert the missing inode item and inode ref.
5917  *
5918  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
5919  * Root dir should be handled specially because root dir is the root of fs.
5920  *
5921  * returns err (>0 or 0) after repair
5922  */
5923 static int repair_fs_first_inode(struct btrfs_root *root, int err)
5924 {
5925         struct btrfs_trans_handle *trans;
5926         struct btrfs_key key;
5927         struct btrfs_path path;
5928         int filetype = BTRFS_FT_DIR;
5929         int ret = 0;
5930
5931         btrfs_init_path(&path);
5932
5933         if (err & INODE_REF_MISSING) {
5934                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5935                 key.type = BTRFS_INODE_REF_KEY;
5936                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
5937
5938                 trans = btrfs_start_transaction(root, 1);
5939                 if (IS_ERR(trans)) {
5940                         ret = PTR_ERR(trans);
5941                         goto out;
5942                 }
5943
5944                 btrfs_release_path(&path);
5945                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
5946                 if (ret)
5947                         goto trans_fail;
5948
5949                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
5950                                              BTRFS_FIRST_FREE_OBJECTID,
5951                                              BTRFS_FIRST_FREE_OBJECTID, 0);
5952                 if (ret)
5953                         goto trans_fail;
5954
5955                 printf("Add INODE_REF[%llu %llu] name %s\n",
5956                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
5957                        "..");
5958                 err &= ~INODE_REF_MISSING;
5959 trans_fail:
5960                 if (ret)
5961                         error("fail to insert first inode's ref");
5962                 btrfs_commit_transaction(trans, root);
5963         }
5964
5965         if (err & INODE_ITEM_MISSING) {
5966                 ret = repair_inode_item_missing(root,
5967                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
5968                 if (ret)
5969                         goto out;
5970                 err &= ~INODE_ITEM_MISSING;
5971         }
5972 out:
5973         if (ret)
5974                 error("fail to repair first inode");
5975         btrfs_release_path(&path);
5976         return err;
5977 }
5978
5979 /*
5980  * check first root dir's inode_item and inode_ref
5981  *
5982  * returns 0 means no error
5983  * returns >0 means error
5984  * returns <0 means fatal error
5985  */
5986 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
5987 {
5988         struct btrfs_path path;
5989         struct btrfs_key key;
5990         struct btrfs_inode_item *ii;
5991         u64 index;
5992         u32 mode;
5993         int err = 0;
5994         int ret;
5995
5996         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5997         key.type = BTRFS_INODE_ITEM_KEY;
5998         key.offset = 0;
5999
6000         /* For root being dropped, we don't need to check first inode */
6001         if (btrfs_root_refs(&root->root_item) == 0 &&
6002             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6003             BTRFS_FIRST_FREE_OBJECTID)
6004                 return 0;
6005
6006         btrfs_init_path(&path);
6007         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6008         if (ret < 0)
6009                 goto out;
6010         if (ret > 0) {
6011                 ret = 0;
6012                 err |= INODE_ITEM_MISSING;
6013         } else {
6014                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6015                                     struct btrfs_inode_item);
6016                 mode = btrfs_inode_mode(path.nodes[0], ii);
6017                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6018                         err |= INODE_ITEM_MISMATCH;
6019         }
6020
6021         /* lookup first inode ref */
6022         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6023         key.type = BTRFS_INODE_REF_KEY;
6024         /* special index value */
6025         index = 0;
6026
6027         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6028         if (ret < 0)
6029                 goto out;
6030         err |= ret;
6031
6032 out:
6033         btrfs_release_path(&path);
6034
6035         if (err && repair)
6036                 err = repair_fs_first_inode(root, err);
6037
6038         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6039                 error("root dir INODE_ITEM is %s",
6040                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6041         if (err & INODE_REF_MISSING)
6042                 error("root dir INODE_REF is missing");
6043
6044         return ret < 0 ? ret : err;
6045 }
6046
6047 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6048                                                 u64 parent, u64 root)
6049 {
6050         struct rb_node *node;
6051         struct tree_backref *back = NULL;
6052         struct tree_backref match = {
6053                 .node = {
6054                         .is_data = 0,
6055                 },
6056         };
6057
6058         if (parent) {
6059                 match.parent = parent;
6060                 match.node.full_backref = 1;
6061         } else {
6062                 match.root = root;
6063         }
6064
6065         node = rb_search(&rec->backref_tree, &match.node.node,
6066                          (rb_compare_keys)compare_extent_backref, NULL);
6067         if (node)
6068                 back = to_tree_backref(rb_node_to_extent_backref(node));
6069
6070         return back;
6071 }
6072
6073 static struct data_backref *find_data_backref(struct extent_record *rec,
6074                                                 u64 parent, u64 root,
6075                                                 u64 owner, u64 offset,
6076                                                 int found_ref,
6077                                                 u64 disk_bytenr, u64 bytes)
6078 {
6079         struct rb_node *node;
6080         struct data_backref *back = NULL;
6081         struct data_backref match = {
6082                 .node = {
6083                         .is_data = 1,
6084                 },
6085                 .owner = owner,
6086                 .offset = offset,
6087                 .bytes = bytes,
6088                 .found_ref = found_ref,
6089                 .disk_bytenr = disk_bytenr,
6090         };
6091
6092         if (parent) {
6093                 match.parent = parent;
6094                 match.node.full_backref = 1;
6095         } else {
6096                 match.root = root;
6097         }
6098
6099         node = rb_search(&rec->backref_tree, &match.node.node,
6100                          (rb_compare_keys)compare_extent_backref, NULL);
6101         if (node)
6102                 back = to_data_backref(rb_node_to_extent_backref(node));
6103
6104         return back;
6105 }
6106 /*
6107  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6108  * blocks and integrity of fs tree items.
6109  *
6110  * @root:         the root of the tree to be checked.
6111  * @ext_ref       feature EXTENDED_IREF is enable or not.
6112  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6113  *                otherwise means check fs tree(s) items relationship and
6114  *                @root MUST be a fs tree root.
6115  * Returns 0      represents OK.
6116  * Returns not 0  represents error.
6117  */
6118 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6119                             struct btrfs_root *root, unsigned int ext_ref,
6120                             int check_all)
6121
6122 {
6123         struct btrfs_path path;
6124         struct node_refs nrefs;
6125         struct btrfs_root_item *root_item = &root->root_item;
6126         int ret;
6127         int level;
6128         int err = 0;
6129
6130         memset(&nrefs, 0, sizeof(nrefs));
6131         if (!check_all) {
6132                 /*
6133                  * We need to manually check the first inode item (256)
6134                  * As the following traversal function will only start from
6135                  * the first inode item in the leaf, if inode item (256) is
6136                  * missing we will skip it forever.
6137                  */
6138                 ret = check_fs_first_inode(root, ext_ref);
6139                 if (ret < 0)
6140                         return ret;
6141         }
6142
6143
6144         level = btrfs_header_level(root->node);
6145         btrfs_init_path(&path);
6146
6147         if (btrfs_root_refs(root_item) > 0 ||
6148             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6149                 path.nodes[level] = root->node;
6150                 path.slots[level] = 0;
6151                 extent_buffer_get(root->node);
6152         } else {
6153                 struct btrfs_key key;
6154
6155                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6156                 level = root_item->drop_level;
6157                 path.lowest_level = level;
6158                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6159                 if (ret < 0)
6160                         goto out;
6161                 ret = 0;
6162         }
6163
6164         while (1) {
6165                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6166                                         ext_ref, check_all);
6167
6168                 err |= !!ret;
6169
6170                 /* if ret is negative, walk shall stop */
6171                 if (ret < 0) {
6172                         ret = err;
6173                         break;
6174                 }
6175
6176                 ret = walk_up_tree_v2(root, &path, &level);
6177                 if (ret != 0) {
6178                         /* Normal exit, reset ret to err */
6179                         ret = err;
6180                         break;
6181                 }
6182         }
6183
6184 out:
6185         btrfs_release_path(&path);
6186         return ret;
6187 }
6188
6189 /*
6190  * Iterate all items in the tree and call check_inode_item() to check.
6191  *
6192  * @root:       the root of the tree to be checked.
6193  * @ext_ref:    the EXTENDED_IREF feature
6194  *
6195  * Return 0 if no error found.
6196  * Return <0 for error.
6197  */
6198 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6199 {
6200         reset_cached_block_groups(root->fs_info);
6201         return check_btrfs_root(NULL, root, ext_ref, 0);
6202 }
6203
6204 /*
6205  * Find the relative ref for root_ref and root_backref.
6206  *
6207  * @root:       the root of the root tree.
6208  * @ref_key:    the key of the root ref.
6209  *
6210  * Return 0 if no error occurred.
6211  */
6212 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6213                           struct extent_buffer *node, int slot)
6214 {
6215         struct btrfs_path path;
6216         struct btrfs_key key;
6217         struct btrfs_root_ref *ref;
6218         struct btrfs_root_ref *backref;
6219         char ref_name[BTRFS_NAME_LEN] = {0};
6220         char backref_name[BTRFS_NAME_LEN] = {0};
6221         u64 ref_dirid;
6222         u64 ref_seq;
6223         u32 ref_namelen;
6224         u64 backref_dirid;
6225         u64 backref_seq;
6226         u32 backref_namelen;
6227         u32 len;
6228         int ret;
6229         int err = 0;
6230
6231         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6232         ref_dirid = btrfs_root_ref_dirid(node, ref);
6233         ref_seq = btrfs_root_ref_sequence(node, ref);
6234         ref_namelen = btrfs_root_ref_name_len(node, ref);
6235
6236         if (ref_namelen <= BTRFS_NAME_LEN) {
6237                 len = ref_namelen;
6238         } else {
6239                 len = BTRFS_NAME_LEN;
6240                 warning("%s[%llu %llu] ref_name too long",
6241                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6242                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6243                         ref_key->offset);
6244         }
6245         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6246
6247         /* Find relative root_ref */
6248         key.objectid = ref_key->offset;
6249         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6250         key.offset = ref_key->objectid;
6251
6252         btrfs_init_path(&path);
6253         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6254         if (ret) {
6255                 err |= ROOT_REF_MISSING;
6256                 error("%s[%llu %llu] couldn't find relative ref",
6257                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6258                       "ROOT_REF" : "ROOT_BACKREF",
6259                       ref_key->objectid, ref_key->offset);
6260                 goto out;
6261         }
6262
6263         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6264                                  struct btrfs_root_ref);
6265         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6266         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6267         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6268
6269         if (backref_namelen <= BTRFS_NAME_LEN) {
6270                 len = backref_namelen;
6271         } else {
6272                 len = BTRFS_NAME_LEN;
6273                 warning("%s[%llu %llu] ref_name too long",
6274                         key.type == BTRFS_ROOT_REF_KEY ?
6275                         "ROOT_REF" : "ROOT_BACKREF",
6276                         key.objectid, key.offset);
6277         }
6278         read_extent_buffer(path.nodes[0], backref_name,
6279                            (unsigned long)(backref + 1), len);
6280
6281         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6282             ref_namelen != backref_namelen ||
6283             strncmp(ref_name, backref_name, len)) {
6284                 err |= ROOT_REF_MISMATCH;
6285                 error("%s[%llu %llu] mismatch relative ref",
6286                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6287                       "ROOT_REF" : "ROOT_BACKREF",
6288                       ref_key->objectid, ref_key->offset);
6289         }
6290 out:
6291         btrfs_release_path(&path);
6292         return err;
6293 }
6294
6295 /*
6296  * Check all fs/file tree in low_memory mode.
6297  *
6298  * 1. for fs tree root item, call check_fs_root_v2()
6299  * 2. for fs tree root ref/backref, call check_root_ref()
6300  *
6301  * Return 0 if no error occurred.
6302  */
6303 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6304 {
6305         struct btrfs_root *tree_root = fs_info->tree_root;
6306         struct btrfs_root *cur_root = NULL;
6307         struct btrfs_path path;
6308         struct btrfs_key key;
6309         struct extent_buffer *node;
6310         unsigned int ext_ref;
6311         int slot;
6312         int ret;
6313         int err = 0;
6314
6315         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6316
6317         btrfs_init_path(&path);
6318         key.objectid = BTRFS_FS_TREE_OBJECTID;
6319         key.offset = 0;
6320         key.type = BTRFS_ROOT_ITEM_KEY;
6321
6322         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6323         if (ret < 0) {
6324                 err = ret;
6325                 goto out;
6326         } else if (ret > 0) {
6327                 err = -ENOENT;
6328                 goto out;
6329         }
6330
6331         while (1) {
6332                 node = path.nodes[0];
6333                 slot = path.slots[0];
6334                 btrfs_item_key_to_cpu(node, &key, slot);
6335                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6336                         goto out;
6337                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6338                     fs_root_objectid(key.objectid)) {
6339                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6340                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6341                                                                        &key);
6342                         } else {
6343                                 key.offset = (u64)-1;
6344                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6345                         }
6346
6347                         if (IS_ERR(cur_root)) {
6348                                 error("Fail to read fs/subvol tree: %lld",
6349                                       key.objectid);
6350                                 err = -EIO;
6351                                 goto next;
6352                         }
6353
6354                         ret = check_fs_root_v2(cur_root, ext_ref);
6355                         err |= ret;
6356
6357                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6358                                 btrfs_free_fs_root(cur_root);
6359                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6360                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6361                         ret = check_root_ref(tree_root, &key, node, slot);
6362                         err |= ret;
6363                 }
6364 next:
6365                 ret = btrfs_next_item(tree_root, &path);
6366                 if (ret > 0)
6367                         goto out;
6368                 if (ret < 0) {
6369                         err = ret;
6370                         goto out;
6371                 }
6372         }
6373
6374 out:
6375         btrfs_release_path(&path);
6376         return err;
6377 }
6378
6379 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6380                           struct cache_tree *root_cache)
6381 {
6382         int ret;
6383
6384         if (!ctx.progress_enabled)
6385                 fprintf(stderr, "checking fs roots\n");
6386         if (check_mode == CHECK_MODE_LOWMEM)
6387                 ret = check_fs_roots_v2(fs_info);
6388         else
6389                 ret = check_fs_roots(fs_info, root_cache);
6390
6391         return ret;
6392 }
6393
6394 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6395 {
6396         struct extent_backref *back, *tmp;
6397         struct tree_backref *tback;
6398         struct data_backref *dback;
6399         u64 found = 0;
6400         int err = 0;
6401
6402         rbtree_postorder_for_each_entry_safe(back, tmp,
6403                                              &rec->backref_tree, node) {
6404                 if (!back->found_extent_tree) {
6405                         err = 1;
6406                         if (!print_errs)
6407                                 goto out;
6408                         if (back->is_data) {
6409                                 dback = to_data_backref(back);
6410                                 fprintf(stderr, "Data backref %llu %s %llu"
6411                                         " owner %llu offset %llu num_refs %lu"
6412                                         " not found in extent tree\n",
6413                                         (unsigned long long)rec->start,
6414                                         back->full_backref ?
6415                                         "parent" : "root",
6416                                         back->full_backref ?
6417                                         (unsigned long long)dback->parent:
6418                                         (unsigned long long)dback->root,
6419                                         (unsigned long long)dback->owner,
6420                                         (unsigned long long)dback->offset,
6421                                         (unsigned long)dback->num_refs);
6422                         } else {
6423                                 tback = to_tree_backref(back);
6424                                 fprintf(stderr, "Tree backref %llu parent %llu"
6425                                         " root %llu not found in extent tree\n",
6426                                         (unsigned long long)rec->start,
6427                                         (unsigned long long)tback->parent,
6428                                         (unsigned long long)tback->root);
6429                         }
6430                 }
6431                 if (!back->is_data && !back->found_ref) {
6432                         err = 1;
6433                         if (!print_errs)
6434                                 goto out;
6435                         tback = to_tree_backref(back);
6436                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6437                                 (unsigned long long)rec->start,
6438                                 back->full_backref ? "parent" : "root",
6439                                 back->full_backref ?
6440                                 (unsigned long long)tback->parent :
6441                                 (unsigned long long)tback->root, back);
6442                 }
6443                 if (back->is_data) {
6444                         dback = to_data_backref(back);
6445                         if (dback->found_ref != dback->num_refs) {
6446                                 err = 1;
6447                                 if (!print_errs)
6448                                         goto out;
6449                                 fprintf(stderr, "Incorrect local backref count"
6450                                         " on %llu %s %llu owner %llu"
6451                                         " offset %llu found %u wanted %u back %p\n",
6452                                         (unsigned long long)rec->start,
6453                                         back->full_backref ?
6454                                         "parent" : "root",
6455                                         back->full_backref ?
6456                                         (unsigned long long)dback->parent:
6457                                         (unsigned long long)dback->root,
6458                                         (unsigned long long)dback->owner,
6459                                         (unsigned long long)dback->offset,
6460                                         dback->found_ref, dback->num_refs, back);
6461                         }
6462                         if (dback->disk_bytenr != rec->start) {
6463                                 err = 1;
6464                                 if (!print_errs)
6465                                         goto out;
6466                                 fprintf(stderr, "Backref disk bytenr does not"
6467                                         " match extent record, bytenr=%llu, "
6468                                         "ref bytenr=%llu\n",
6469                                         (unsigned long long)rec->start,
6470                                         (unsigned long long)dback->disk_bytenr);
6471                         }
6472
6473                         if (dback->bytes != rec->nr) {
6474                                 err = 1;
6475                                 if (!print_errs)
6476                                         goto out;
6477                                 fprintf(stderr, "Backref bytes do not match "
6478                                         "extent backref, bytenr=%llu, ref "
6479                                         "bytes=%llu, backref bytes=%llu\n",
6480                                         (unsigned long long)rec->start,
6481                                         (unsigned long long)rec->nr,
6482                                         (unsigned long long)dback->bytes);
6483                         }
6484                 }
6485                 if (!back->is_data) {
6486                         found += 1;
6487                 } else {
6488                         dback = to_data_backref(back);
6489                         found += dback->found_ref;
6490                 }
6491         }
6492         if (found != rec->refs) {
6493                 err = 1;
6494                 if (!print_errs)
6495                         goto out;
6496                 fprintf(stderr, "Incorrect global backref count "
6497                         "on %llu found %llu wanted %llu\n",
6498                         (unsigned long long)rec->start,
6499                         (unsigned long long)found,
6500                         (unsigned long long)rec->refs);
6501         }
6502 out:
6503         return err;
6504 }
6505
6506 static void __free_one_backref(struct rb_node *node)
6507 {
6508         struct extent_backref *back = rb_node_to_extent_backref(node);
6509
6510         free(back);
6511 }
6512
6513 static void free_all_extent_backrefs(struct extent_record *rec)
6514 {
6515         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6516 }
6517
6518 static void free_extent_record_cache(struct cache_tree *extent_cache)
6519 {
6520         struct cache_extent *cache;
6521         struct extent_record *rec;
6522
6523         while (1) {
6524                 cache = first_cache_extent(extent_cache);
6525                 if (!cache)
6526                         break;
6527                 rec = container_of(cache, struct extent_record, cache);
6528                 remove_cache_extent(extent_cache, cache);
6529                 free_all_extent_backrefs(rec);
6530                 free(rec);
6531         }
6532 }
6533
6534 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6535                                  struct extent_record *rec)
6536 {
6537         if (rec->content_checked && rec->owner_ref_checked &&
6538             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6539             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6540             !rec->bad_full_backref && !rec->crossing_stripes &&
6541             !rec->wrong_chunk_type) {
6542                 remove_cache_extent(extent_cache, &rec->cache);
6543                 free_all_extent_backrefs(rec);
6544                 list_del_init(&rec->list);
6545                 free(rec);
6546         }
6547         return 0;
6548 }
6549
6550 static int check_owner_ref(struct btrfs_root *root,
6551                             struct extent_record *rec,
6552                             struct extent_buffer *buf)
6553 {
6554         struct extent_backref *node, *tmp;
6555         struct tree_backref *back;
6556         struct btrfs_root *ref_root;
6557         struct btrfs_key key;
6558         struct btrfs_path path;
6559         struct extent_buffer *parent;
6560         int level;
6561         int found = 0;
6562         int ret;
6563
6564         rbtree_postorder_for_each_entry_safe(node, tmp,
6565                                              &rec->backref_tree, node) {
6566                 if (node->is_data)
6567                         continue;
6568                 if (!node->found_ref)
6569                         continue;
6570                 if (node->full_backref)
6571                         continue;
6572                 back = to_tree_backref(node);
6573                 if (btrfs_header_owner(buf) == back->root)
6574                         return 0;
6575         }
6576         BUG_ON(rec->is_root);
6577
6578         /* try to find the block by search corresponding fs tree */
6579         key.objectid = btrfs_header_owner(buf);
6580         key.type = BTRFS_ROOT_ITEM_KEY;
6581         key.offset = (u64)-1;
6582
6583         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6584         if (IS_ERR(ref_root))
6585                 return 1;
6586
6587         level = btrfs_header_level(buf);
6588         if (level == 0)
6589                 btrfs_item_key_to_cpu(buf, &key, 0);
6590         else
6591                 btrfs_node_key_to_cpu(buf, &key, 0);
6592
6593         btrfs_init_path(&path);
6594         path.lowest_level = level + 1;
6595         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6596         if (ret < 0)
6597                 return 0;
6598
6599         parent = path.nodes[level + 1];
6600         if (parent && buf->start == btrfs_node_blockptr(parent,
6601                                                         path.slots[level + 1]))
6602                 found = 1;
6603
6604         btrfs_release_path(&path);
6605         return found ? 0 : 1;
6606 }
6607
6608 static int is_extent_tree_record(struct extent_record *rec)
6609 {
6610         struct extent_backref *node, *tmp;
6611         struct tree_backref *back;
6612         int is_extent = 0;
6613
6614         rbtree_postorder_for_each_entry_safe(node, tmp,
6615                                              &rec->backref_tree, node) {
6616                 if (node->is_data)
6617                         return 0;
6618                 back = to_tree_backref(node);
6619                 if (node->full_backref)
6620                         return 0;
6621                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6622                         is_extent = 1;
6623         }
6624         return is_extent;
6625 }
6626
6627
6628 static int record_bad_block_io(struct btrfs_fs_info *info,
6629                                struct cache_tree *extent_cache,
6630                                u64 start, u64 len)
6631 {
6632         struct extent_record *rec;
6633         struct cache_extent *cache;
6634         struct btrfs_key key;
6635
6636         cache = lookup_cache_extent(extent_cache, start, len);
6637         if (!cache)
6638                 return 0;
6639
6640         rec = container_of(cache, struct extent_record, cache);
6641         if (!is_extent_tree_record(rec))
6642                 return 0;
6643
6644         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6645         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6646 }
6647
6648 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6649                        struct extent_buffer *buf, int slot)
6650 {
6651         if (btrfs_header_level(buf)) {
6652                 struct btrfs_key_ptr ptr1, ptr2;
6653
6654                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6655                                    sizeof(struct btrfs_key_ptr));
6656                 read_extent_buffer(buf, &ptr2,
6657                                    btrfs_node_key_ptr_offset(slot + 1),
6658                                    sizeof(struct btrfs_key_ptr));
6659                 write_extent_buffer(buf, &ptr1,
6660                                     btrfs_node_key_ptr_offset(slot + 1),
6661                                     sizeof(struct btrfs_key_ptr));
6662                 write_extent_buffer(buf, &ptr2,
6663                                     btrfs_node_key_ptr_offset(slot),
6664                                     sizeof(struct btrfs_key_ptr));
6665                 if (slot == 0) {
6666                         struct btrfs_disk_key key;
6667                         btrfs_node_key(buf, &key, 0);
6668                         btrfs_fixup_low_keys(root, path, &key,
6669                                              btrfs_header_level(buf) + 1);
6670                 }
6671         } else {
6672                 struct btrfs_item *item1, *item2;
6673                 struct btrfs_key k1, k2;
6674                 char *item1_data, *item2_data;
6675                 u32 item1_offset, item2_offset, item1_size, item2_size;
6676
6677                 item1 = btrfs_item_nr(slot);
6678                 item2 = btrfs_item_nr(slot + 1);
6679                 btrfs_item_key_to_cpu(buf, &k1, slot);
6680                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6681                 item1_offset = btrfs_item_offset(buf, item1);
6682                 item2_offset = btrfs_item_offset(buf, item2);
6683                 item1_size = btrfs_item_size(buf, item1);
6684                 item2_size = btrfs_item_size(buf, item2);
6685
6686                 item1_data = malloc(item1_size);
6687                 if (!item1_data)
6688                         return -ENOMEM;
6689                 item2_data = malloc(item2_size);
6690                 if (!item2_data) {
6691                         free(item1_data);
6692                         return -ENOMEM;
6693                 }
6694
6695                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6696                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6697
6698                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6699                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6700                 free(item1_data);
6701                 free(item2_data);
6702
6703                 btrfs_set_item_offset(buf, item1, item2_offset);
6704                 btrfs_set_item_offset(buf, item2, item1_offset);
6705                 btrfs_set_item_size(buf, item1, item2_size);
6706                 btrfs_set_item_size(buf, item2, item1_size);
6707
6708                 path->slots[0] = slot;
6709                 btrfs_set_item_key_unsafe(root, path, &k2);
6710                 path->slots[0] = slot + 1;
6711                 btrfs_set_item_key_unsafe(root, path, &k1);
6712         }
6713         return 0;
6714 }
6715
6716 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6717 {
6718         struct extent_buffer *buf;
6719         struct btrfs_key k1, k2;
6720         int i;
6721         int level = path->lowest_level;
6722         int ret = -EIO;
6723
6724         buf = path->nodes[level];
6725         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6726                 if (level) {
6727                         btrfs_node_key_to_cpu(buf, &k1, i);
6728                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6729                 } else {
6730                         btrfs_item_key_to_cpu(buf, &k1, i);
6731                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6732                 }
6733                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6734                         continue;
6735                 ret = swap_values(root, path, buf, i);
6736                 if (ret)
6737                         break;
6738                 btrfs_mark_buffer_dirty(buf);
6739                 i = 0;
6740         }
6741         return ret;
6742 }
6743
6744 static int delete_bogus_item(struct btrfs_root *root,
6745                              struct btrfs_path *path,
6746                              struct extent_buffer *buf, int slot)
6747 {
6748         struct btrfs_key key;
6749         int nritems = btrfs_header_nritems(buf);
6750
6751         btrfs_item_key_to_cpu(buf, &key, slot);
6752
6753         /* These are all the keys we can deal with missing. */
6754         if (key.type != BTRFS_DIR_INDEX_KEY &&
6755             key.type != BTRFS_EXTENT_ITEM_KEY &&
6756             key.type != BTRFS_METADATA_ITEM_KEY &&
6757             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6758             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6759                 return -1;
6760
6761         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6762                (unsigned long long)key.objectid, key.type,
6763                (unsigned long long)key.offset, slot, buf->start);
6764         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6765                               btrfs_item_nr_offset(slot + 1),
6766                               sizeof(struct btrfs_item) *
6767                               (nritems - slot - 1));
6768         btrfs_set_header_nritems(buf, nritems - 1);
6769         if (slot == 0) {
6770                 struct btrfs_disk_key disk_key;
6771
6772                 btrfs_item_key(buf, &disk_key, 0);
6773                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6774         }
6775         btrfs_mark_buffer_dirty(buf);
6776         return 0;
6777 }
6778
6779 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6780 {
6781         struct extent_buffer *buf;
6782         int i;
6783         int ret = 0;
6784
6785         /* We should only get this for leaves */
6786         BUG_ON(path->lowest_level);
6787         buf = path->nodes[0];
6788 again:
6789         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6790                 unsigned int shift = 0, offset;
6791
6792                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6793                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6794                         if (btrfs_item_end_nr(buf, i) >
6795                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6796                                 ret = delete_bogus_item(root, path, buf, i);
6797                                 if (!ret)
6798                                         goto again;
6799                                 fprintf(stderr, "item is off the end of the "
6800                                         "leaf, can't fix\n");
6801                                 ret = -EIO;
6802                                 break;
6803                         }
6804                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6805                                 btrfs_item_end_nr(buf, i);
6806                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6807                            btrfs_item_offset_nr(buf, i - 1)) {
6808                         if (btrfs_item_end_nr(buf, i) >
6809                             btrfs_item_offset_nr(buf, i - 1)) {
6810                                 ret = delete_bogus_item(root, path, buf, i);
6811                                 if (!ret)
6812                                         goto again;
6813                                 fprintf(stderr, "items overlap, can't fix\n");
6814                                 ret = -EIO;
6815                                 break;
6816                         }
6817                         shift = btrfs_item_offset_nr(buf, i - 1) -
6818                                 btrfs_item_end_nr(buf, i);
6819                 }
6820                 if (!shift)
6821                         continue;
6822
6823                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6824                        i, shift, (unsigned long long)buf->start);
6825                 offset = btrfs_item_offset_nr(buf, i);
6826                 memmove_extent_buffer(buf,
6827                                       btrfs_leaf_data(buf) + offset + shift,
6828                                       btrfs_leaf_data(buf) + offset,
6829                                       btrfs_item_size_nr(buf, i));
6830                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6831                                       offset + shift);
6832                 btrfs_mark_buffer_dirty(buf);
6833         }
6834
6835         /*
6836          * We may have moved things, in which case we want to exit so we don't
6837          * write those changes out.  Once we have proper abort functionality in
6838          * progs this can be changed to something nicer.
6839          */
6840         BUG_ON(ret);
6841         return ret;
6842 }
6843
6844 /*
6845  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6846  * then just return -EIO.
6847  */
6848 static int try_to_fix_bad_block(struct btrfs_root *root,
6849                                 struct extent_buffer *buf,
6850                                 enum btrfs_tree_block_status status)
6851 {
6852         struct btrfs_trans_handle *trans;
6853         struct ulist *roots;
6854         struct ulist_node *node;
6855         struct btrfs_root *search_root;
6856         struct btrfs_path path;
6857         struct ulist_iterator iter;
6858         struct btrfs_key root_key, key;
6859         int ret;
6860
6861         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6862             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6863                 return -EIO;
6864
6865         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6866         if (ret)
6867                 return -EIO;
6868
6869         btrfs_init_path(&path);
6870         ULIST_ITER_INIT(&iter);
6871         while ((node = ulist_next(roots, &iter))) {
6872                 root_key.objectid = node->val;
6873                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6874                 root_key.offset = (u64)-1;
6875
6876                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6877                 if (IS_ERR(root)) {
6878                         ret = -EIO;
6879                         break;
6880                 }
6881
6882
6883                 trans = btrfs_start_transaction(search_root, 0);
6884                 if (IS_ERR(trans)) {
6885                         ret = PTR_ERR(trans);
6886                         break;
6887                 }
6888
6889                 path.lowest_level = btrfs_header_level(buf);
6890                 path.skip_check_block = 1;
6891                 if (path.lowest_level)
6892                         btrfs_node_key_to_cpu(buf, &key, 0);
6893                 else
6894                         btrfs_item_key_to_cpu(buf, &key, 0);
6895                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
6896                 if (ret) {
6897                         ret = -EIO;
6898                         btrfs_commit_transaction(trans, search_root);
6899                         break;
6900                 }
6901                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
6902                         ret = fix_key_order(search_root, &path);
6903                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6904                         ret = fix_item_offset(search_root, &path);
6905                 if (ret) {
6906                         btrfs_commit_transaction(trans, search_root);
6907                         break;
6908                 }
6909                 btrfs_release_path(&path);
6910                 btrfs_commit_transaction(trans, search_root);
6911         }
6912         ulist_free(roots);
6913         btrfs_release_path(&path);
6914         return ret;
6915 }
6916
6917 static int check_block(struct btrfs_root *root,
6918                        struct cache_tree *extent_cache,
6919                        struct extent_buffer *buf, u64 flags)
6920 {
6921         struct extent_record *rec;
6922         struct cache_extent *cache;
6923         struct btrfs_key key;
6924         enum btrfs_tree_block_status status;
6925         int ret = 0;
6926         int level;
6927
6928         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
6929         if (!cache)
6930                 return 1;
6931         rec = container_of(cache, struct extent_record, cache);
6932         rec->generation = btrfs_header_generation(buf);
6933
6934         level = btrfs_header_level(buf);
6935         if (btrfs_header_nritems(buf) > 0) {
6936
6937                 if (level == 0)
6938                         btrfs_item_key_to_cpu(buf, &key, 0);
6939                 else
6940                         btrfs_node_key_to_cpu(buf, &key, 0);
6941
6942                 rec->info_objectid = key.objectid;
6943         }
6944         rec->info_level = level;
6945
6946         if (btrfs_is_leaf(buf))
6947                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
6948         else
6949                 status = btrfs_check_node(root, &rec->parent_key, buf);
6950
6951         if (status != BTRFS_TREE_BLOCK_CLEAN) {
6952                 if (repair)
6953                         status = try_to_fix_bad_block(root, buf, status);
6954                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
6955                         ret = -EIO;
6956                         fprintf(stderr, "bad block %llu\n",
6957                                 (unsigned long long)buf->start);
6958                 } else {
6959                         /*
6960                          * Signal to callers we need to start the scan over
6961                          * again since we'll have cowed blocks.
6962                          */
6963                         ret = -EAGAIN;
6964                 }
6965         } else {
6966                 rec->content_checked = 1;
6967                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6968                         rec->owner_ref_checked = 1;
6969                 else {
6970                         ret = check_owner_ref(root, rec, buf);
6971                         if (!ret)
6972                                 rec->owner_ref_checked = 1;
6973                 }
6974         }
6975         if (!ret)
6976                 maybe_free_extent_rec(extent_cache, rec);
6977         return ret;
6978 }
6979
6980 #if 0
6981 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6982                                                 u64 parent, u64 root)
6983 {
6984         struct list_head *cur = rec->backrefs.next;
6985         struct extent_backref *node;
6986         struct tree_backref *back;
6987
6988         while(cur != &rec->backrefs) {
6989                 node = to_extent_backref(cur);
6990                 cur = cur->next;
6991                 if (node->is_data)
6992                         continue;
6993                 back = to_tree_backref(node);
6994                 if (parent > 0) {
6995                         if (!node->full_backref)
6996                                 continue;
6997                         if (parent == back->parent)
6998                                 return back;
6999                 } else {
7000                         if (node->full_backref)
7001                                 continue;
7002                         if (back->root == root)
7003                                 return back;
7004                 }
7005         }
7006         return NULL;
7007 }
7008 #endif
7009
7010 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7011                                                 u64 parent, u64 root)
7012 {
7013         struct tree_backref *ref = malloc(sizeof(*ref));
7014
7015         if (!ref)
7016                 return NULL;
7017         memset(&ref->node, 0, sizeof(ref->node));
7018         if (parent > 0) {
7019                 ref->parent = parent;
7020                 ref->node.full_backref = 1;
7021         } else {
7022                 ref->root = root;
7023                 ref->node.full_backref = 0;
7024         }
7025
7026         return ref;
7027 }
7028
7029 #if 0
7030 static struct data_backref *find_data_backref(struct extent_record *rec,
7031                                                 u64 parent, u64 root,
7032                                                 u64 owner, u64 offset,
7033                                                 int found_ref,
7034                                                 u64 disk_bytenr, u64 bytes)
7035 {
7036         struct list_head *cur = rec->backrefs.next;
7037         struct extent_backref *node;
7038         struct data_backref *back;
7039
7040         while(cur != &rec->backrefs) {
7041                 node = to_extent_backref(cur);
7042                 cur = cur->next;
7043                 if (!node->is_data)
7044                         continue;
7045                 back = to_data_backref(node);
7046                 if (parent > 0) {
7047                         if (!node->full_backref)
7048                                 continue;
7049                         if (parent == back->parent)
7050                                 return back;
7051                 } else {
7052                         if (node->full_backref)
7053                                 continue;
7054                         if (back->root == root && back->owner == owner &&
7055                             back->offset == offset) {
7056                                 if (found_ref && node->found_ref &&
7057                                     (back->bytes != bytes ||
7058                                     back->disk_bytenr != disk_bytenr))
7059                                         continue;
7060                                 return back;
7061                         }
7062                 }
7063         }
7064         return NULL;
7065 }
7066 #endif
7067
7068 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7069                                                 u64 parent, u64 root,
7070                                                 u64 owner, u64 offset,
7071                                                 u64 max_size)
7072 {
7073         struct data_backref *ref = malloc(sizeof(*ref));
7074
7075         if (!ref)
7076                 return NULL;
7077         memset(&ref->node, 0, sizeof(ref->node));
7078         ref->node.is_data = 1;
7079
7080         if (parent > 0) {
7081                 ref->parent = parent;
7082                 ref->owner = 0;
7083                 ref->offset = 0;
7084                 ref->node.full_backref = 1;
7085         } else {
7086                 ref->root = root;
7087                 ref->owner = owner;
7088                 ref->offset = offset;
7089                 ref->node.full_backref = 0;
7090         }
7091         ref->bytes = max_size;
7092         ref->found_ref = 0;
7093         ref->num_refs = 0;
7094         if (max_size > rec->max_size)
7095                 rec->max_size = max_size;
7096         return ref;
7097 }
7098
7099 /* Check if the type of extent matches with its chunk */
7100 static void check_extent_type(struct extent_record *rec)
7101 {
7102         struct btrfs_block_group_cache *bg_cache;
7103
7104         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7105         if (!bg_cache)
7106                 return;
7107
7108         /* data extent, check chunk directly*/
7109         if (!rec->metadata) {
7110                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7111                         rec->wrong_chunk_type = 1;
7112                 return;
7113         }
7114
7115         /* metadata extent, check the obvious case first */
7116         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7117                                  BTRFS_BLOCK_GROUP_METADATA))) {
7118                 rec->wrong_chunk_type = 1;
7119                 return;
7120         }
7121
7122         /*
7123          * Check SYSTEM extent, as it's also marked as metadata, we can only
7124          * make sure it's a SYSTEM extent by its backref
7125          */
7126         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7127                 struct extent_backref *node;
7128                 struct tree_backref *tback;
7129                 u64 bg_type;
7130
7131                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7132                 if (node->is_data) {
7133                         /* tree block shouldn't have data backref */
7134                         rec->wrong_chunk_type = 1;
7135                         return;
7136                 }
7137                 tback = container_of(node, struct tree_backref, node);
7138
7139                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7140                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7141                 else
7142                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7143                 if (!(bg_cache->flags & bg_type))
7144                         rec->wrong_chunk_type = 1;
7145         }
7146 }
7147
7148 /*
7149  * Allocate a new extent record, fill default values from @tmpl and insert int
7150  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7151  * the cache, otherwise it fails.
7152  */
7153 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7154                 struct extent_record *tmpl)
7155 {
7156         struct extent_record *rec;
7157         int ret = 0;
7158
7159         BUG_ON(tmpl->max_size == 0);
7160         rec = malloc(sizeof(*rec));
7161         if (!rec)
7162                 return -ENOMEM;
7163         rec->start = tmpl->start;
7164         rec->max_size = tmpl->max_size;
7165         rec->nr = max(tmpl->nr, tmpl->max_size);
7166         rec->found_rec = tmpl->found_rec;
7167         rec->content_checked = tmpl->content_checked;
7168         rec->owner_ref_checked = tmpl->owner_ref_checked;
7169         rec->num_duplicates = 0;
7170         rec->metadata = tmpl->metadata;
7171         rec->flag_block_full_backref = FLAG_UNSET;
7172         rec->bad_full_backref = 0;
7173         rec->crossing_stripes = 0;
7174         rec->wrong_chunk_type = 0;
7175         rec->is_root = tmpl->is_root;
7176         rec->refs = tmpl->refs;
7177         rec->extent_item_refs = tmpl->extent_item_refs;
7178         rec->parent_generation = tmpl->parent_generation;
7179         INIT_LIST_HEAD(&rec->backrefs);
7180         INIT_LIST_HEAD(&rec->dups);
7181         INIT_LIST_HEAD(&rec->list);
7182         rec->backref_tree = RB_ROOT;
7183         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7184         rec->cache.start = tmpl->start;
7185         rec->cache.size = tmpl->nr;
7186         ret = insert_cache_extent(extent_cache, &rec->cache);
7187         if (ret) {
7188                 free(rec);
7189                 return ret;
7190         }
7191         bytes_used += rec->nr;
7192
7193         if (tmpl->metadata)
7194                 rec->crossing_stripes = check_crossing_stripes(global_info,
7195                                 rec->start, global_info->nodesize);
7196         check_extent_type(rec);
7197         return ret;
7198 }
7199
7200 /*
7201  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7202  * some are hints:
7203  * - refs              - if found, increase refs
7204  * - is_root           - if found, set
7205  * - content_checked   - if found, set
7206  * - owner_ref_checked - if found, set
7207  *
7208  * If not found, create a new one, initialize and insert.
7209  */
7210 static int add_extent_rec(struct cache_tree *extent_cache,
7211                 struct extent_record *tmpl)
7212 {
7213         struct extent_record *rec;
7214         struct cache_extent *cache;
7215         int ret = 0;
7216         int dup = 0;
7217
7218         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7219         if (cache) {
7220                 rec = container_of(cache, struct extent_record, cache);
7221                 if (tmpl->refs)
7222                         rec->refs++;
7223                 if (rec->nr == 1)
7224                         rec->nr = max(tmpl->nr, tmpl->max_size);
7225
7226                 /*
7227                  * We need to make sure to reset nr to whatever the extent
7228                  * record says was the real size, this way we can compare it to
7229                  * the backrefs.
7230                  */
7231                 if (tmpl->found_rec) {
7232                         if (tmpl->start != rec->start || rec->found_rec) {
7233                                 struct extent_record *tmp;
7234
7235                                 dup = 1;
7236                                 if (list_empty(&rec->list))
7237                                         list_add_tail(&rec->list,
7238                                                       &duplicate_extents);
7239
7240                                 /*
7241                                  * We have to do this song and dance in case we
7242                                  * find an extent record that falls inside of
7243                                  * our current extent record but does not have
7244                                  * the same objectid.
7245                                  */
7246                                 tmp = malloc(sizeof(*tmp));
7247                                 if (!tmp)
7248                                         return -ENOMEM;
7249                                 tmp->start = tmpl->start;
7250                                 tmp->max_size = tmpl->max_size;
7251                                 tmp->nr = tmpl->nr;
7252                                 tmp->found_rec = 1;
7253                                 tmp->metadata = tmpl->metadata;
7254                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7255                                 INIT_LIST_HEAD(&tmp->list);
7256                                 list_add_tail(&tmp->list, &rec->dups);
7257                                 rec->num_duplicates++;
7258                         } else {
7259                                 rec->nr = tmpl->nr;
7260                                 rec->found_rec = 1;
7261                         }
7262                 }
7263
7264                 if (tmpl->extent_item_refs && !dup) {
7265                         if (rec->extent_item_refs) {
7266                                 fprintf(stderr, "block %llu rec "
7267                                         "extent_item_refs %llu, passed %llu\n",
7268                                         (unsigned long long)tmpl->start,
7269                                         (unsigned long long)
7270                                                         rec->extent_item_refs,
7271                                         (unsigned long long)tmpl->extent_item_refs);
7272                         }
7273                         rec->extent_item_refs = tmpl->extent_item_refs;
7274                 }
7275                 if (tmpl->is_root)
7276                         rec->is_root = 1;
7277                 if (tmpl->content_checked)
7278                         rec->content_checked = 1;
7279                 if (tmpl->owner_ref_checked)
7280                         rec->owner_ref_checked = 1;
7281                 memcpy(&rec->parent_key, &tmpl->parent_key,
7282                                 sizeof(tmpl->parent_key));
7283                 if (tmpl->parent_generation)
7284                         rec->parent_generation = tmpl->parent_generation;
7285                 if (rec->max_size < tmpl->max_size)
7286                         rec->max_size = tmpl->max_size;
7287
7288                 /*
7289                  * A metadata extent can't cross stripe_len boundary, otherwise
7290                  * kernel scrub won't be able to handle it.
7291                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7292                  * it.
7293                  */
7294                 if (tmpl->metadata)
7295                         rec->crossing_stripes = check_crossing_stripes(
7296                                         global_info, rec->start,
7297                                         global_info->nodesize);
7298                 check_extent_type(rec);
7299                 maybe_free_extent_rec(extent_cache, rec);
7300                 return ret;
7301         }
7302
7303         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7304
7305         return ret;
7306 }
7307
7308 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7309                             u64 parent, u64 root, int found_ref)
7310 {
7311         struct extent_record *rec;
7312         struct tree_backref *back;
7313         struct cache_extent *cache;
7314         int ret;
7315         bool insert = false;
7316
7317         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7318         if (!cache) {
7319                 struct extent_record tmpl;
7320
7321                 memset(&tmpl, 0, sizeof(tmpl));
7322                 tmpl.start = bytenr;
7323                 tmpl.nr = 1;
7324                 tmpl.metadata = 1;
7325                 tmpl.max_size = 1;
7326
7327                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7328                 if (ret)
7329                         return ret;
7330
7331                 /* really a bug in cache_extent implement now */
7332                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7333                 if (!cache)
7334                         return -ENOENT;
7335         }
7336
7337         rec = container_of(cache, struct extent_record, cache);
7338         if (rec->start != bytenr) {
7339                 /*
7340                  * Several cause, from unaligned bytenr to over lapping extents
7341                  */
7342                 return -EEXIST;
7343         }
7344
7345         back = find_tree_backref(rec, parent, root);
7346         if (!back) {
7347                 back = alloc_tree_backref(rec, parent, root);
7348                 if (!back)
7349                         return -ENOMEM;
7350                 insert = true;
7351         }
7352
7353         if (found_ref) {
7354                 if (back->node.found_ref) {
7355                         fprintf(stderr, "Extent back ref already exists "
7356                                 "for %llu parent %llu root %llu \n",
7357                                 (unsigned long long)bytenr,
7358                                 (unsigned long long)parent,
7359                                 (unsigned long long)root);
7360                 }
7361                 back->node.found_ref = 1;
7362         } else {
7363                 if (back->node.found_extent_tree) {
7364                         fprintf(stderr, "Extent back ref already exists "
7365                                 "for %llu parent %llu root %llu \n",
7366                                 (unsigned long long)bytenr,
7367                                 (unsigned long long)parent,
7368                                 (unsigned long long)root);
7369                 }
7370                 back->node.found_extent_tree = 1;
7371         }
7372         if (insert)
7373                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7374                         compare_extent_backref));
7375         check_extent_type(rec);
7376         maybe_free_extent_rec(extent_cache, rec);
7377         return 0;
7378 }
7379
7380 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7381                             u64 parent, u64 root, u64 owner, u64 offset,
7382                             u32 num_refs, int found_ref, u64 max_size)
7383 {
7384         struct extent_record *rec;
7385         struct data_backref *back;
7386         struct cache_extent *cache;
7387         int ret;
7388         bool insert = false;
7389
7390         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7391         if (!cache) {
7392                 struct extent_record tmpl;
7393
7394                 memset(&tmpl, 0, sizeof(tmpl));
7395                 tmpl.start = bytenr;
7396                 tmpl.nr = 1;
7397                 tmpl.max_size = max_size;
7398
7399                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7400                 if (ret)
7401                         return ret;
7402
7403                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7404                 if (!cache)
7405                         abort();
7406         }
7407
7408         rec = container_of(cache, struct extent_record, cache);
7409         if (rec->max_size < max_size)
7410                 rec->max_size = max_size;
7411
7412         /*
7413          * If found_ref is set then max_size is the real size and must match the
7414          * existing refs.  So if we have already found a ref then we need to
7415          * make sure that this ref matches the existing one, otherwise we need
7416          * to add a new backref so we can notice that the backrefs don't match
7417          * and we need to figure out who is telling the truth.  This is to
7418          * account for that awful fsync bug I introduced where we'd end up with
7419          * a btrfs_file_extent_item that would have its length include multiple
7420          * prealloc extents or point inside of a prealloc extent.
7421          */
7422         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7423                                  bytenr, max_size);
7424         if (!back) {
7425                 back = alloc_data_backref(rec, parent, root, owner, offset,
7426                                           max_size);
7427                 BUG_ON(!back);
7428                 insert = true;
7429         }
7430
7431         if (found_ref) {
7432                 BUG_ON(num_refs != 1);
7433                 if (back->node.found_ref)
7434                         BUG_ON(back->bytes != max_size);
7435                 back->node.found_ref = 1;
7436                 back->found_ref += 1;
7437                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7438                         back->bytes = max_size;
7439                         back->disk_bytenr = bytenr;
7440
7441                         /* Need to reinsert if not already in the tree */
7442                         if (!insert) {
7443                                 rb_erase(&back->node.node, &rec->backref_tree);
7444                                 insert = true;
7445                         }
7446                 }
7447                 rec->refs += 1;
7448                 rec->content_checked = 1;
7449                 rec->owner_ref_checked = 1;
7450         } else {
7451                 if (back->node.found_extent_tree) {
7452                         fprintf(stderr, "Extent back ref already exists "
7453                                 "for %llu parent %llu root %llu "
7454                                 "owner %llu offset %llu num_refs %lu\n",
7455                                 (unsigned long long)bytenr,
7456                                 (unsigned long long)parent,
7457                                 (unsigned long long)root,
7458                                 (unsigned long long)owner,
7459                                 (unsigned long long)offset,
7460                                 (unsigned long)num_refs);
7461                 }
7462                 back->num_refs = num_refs;
7463                 back->node.found_extent_tree = 1;
7464         }
7465         if (insert)
7466                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7467                         compare_extent_backref));
7468
7469         maybe_free_extent_rec(extent_cache, rec);
7470         return 0;
7471 }
7472
7473 static int add_pending(struct cache_tree *pending,
7474                        struct cache_tree *seen, u64 bytenr, u32 size)
7475 {
7476         int ret;
7477         ret = add_cache_extent(seen, bytenr, size);
7478         if (ret)
7479                 return ret;
7480         add_cache_extent(pending, bytenr, size);
7481         return 0;
7482 }
7483
7484 static int pick_next_pending(struct cache_tree *pending,
7485                         struct cache_tree *reada,
7486                         struct cache_tree *nodes,
7487                         u64 last, struct block_info *bits, int bits_nr,
7488                         int *reada_bits)
7489 {
7490         unsigned long node_start = last;
7491         struct cache_extent *cache;
7492         int ret;
7493
7494         cache = search_cache_extent(reada, 0);
7495         if (cache) {
7496                 bits[0].start = cache->start;
7497                 bits[0].size = cache->size;
7498                 *reada_bits = 1;
7499                 return 1;
7500         }
7501         *reada_bits = 0;
7502         if (node_start > 32768)
7503                 node_start -= 32768;
7504
7505         cache = search_cache_extent(nodes, node_start);
7506         if (!cache)
7507                 cache = search_cache_extent(nodes, 0);
7508
7509         if (!cache) {
7510                  cache = search_cache_extent(pending, 0);
7511                  if (!cache)
7512                          return 0;
7513                  ret = 0;
7514                  do {
7515                          bits[ret].start = cache->start;
7516                          bits[ret].size = cache->size;
7517                          cache = next_cache_extent(cache);
7518                          ret++;
7519                  } while (cache && ret < bits_nr);
7520                  return ret;
7521         }
7522
7523         ret = 0;
7524         do {
7525                 bits[ret].start = cache->start;
7526                 bits[ret].size = cache->size;
7527                 cache = next_cache_extent(cache);
7528                 ret++;
7529         } while (cache && ret < bits_nr);
7530
7531         if (bits_nr - ret > 8) {
7532                 u64 lookup = bits[0].start + bits[0].size;
7533                 struct cache_extent *next;
7534                 next = search_cache_extent(pending, lookup);
7535                 while(next) {
7536                         if (next->start - lookup > 32768)
7537                                 break;
7538                         bits[ret].start = next->start;
7539                         bits[ret].size = next->size;
7540                         lookup = next->start + next->size;
7541                         ret++;
7542                         if (ret == bits_nr)
7543                                 break;
7544                         next = next_cache_extent(next);
7545                         if (!next)
7546                                 break;
7547                 }
7548         }
7549         return ret;
7550 }
7551
7552 static void free_chunk_record(struct cache_extent *cache)
7553 {
7554         struct chunk_record *rec;
7555
7556         rec = container_of(cache, struct chunk_record, cache);
7557         list_del_init(&rec->list);
7558         list_del_init(&rec->dextents);
7559         free(rec);
7560 }
7561
7562 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7563 {
7564         cache_tree_free_extents(chunk_cache, free_chunk_record);
7565 }
7566
7567 static void free_device_record(struct rb_node *node)
7568 {
7569         struct device_record *rec;
7570
7571         rec = container_of(node, struct device_record, node);
7572         free(rec);
7573 }
7574
7575 FREE_RB_BASED_TREE(device_cache, free_device_record);
7576
7577 int insert_block_group_record(struct block_group_tree *tree,
7578                               struct block_group_record *bg_rec)
7579 {
7580         int ret;
7581
7582         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7583         if (ret)
7584                 return ret;
7585
7586         list_add_tail(&bg_rec->list, &tree->block_groups);
7587         return 0;
7588 }
7589
7590 static void free_block_group_record(struct cache_extent *cache)
7591 {
7592         struct block_group_record *rec;
7593
7594         rec = container_of(cache, struct block_group_record, cache);
7595         list_del_init(&rec->list);
7596         free(rec);
7597 }
7598
7599 void free_block_group_tree(struct block_group_tree *tree)
7600 {
7601         cache_tree_free_extents(&tree->tree, free_block_group_record);
7602 }
7603
7604 int insert_device_extent_record(struct device_extent_tree *tree,
7605                                 struct device_extent_record *de_rec)
7606 {
7607         int ret;
7608
7609         /*
7610          * Device extent is a bit different from the other extents, because
7611          * the extents which belong to the different devices may have the
7612          * same start and size, so we need use the special extent cache
7613          * search/insert functions.
7614          */
7615         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7616         if (ret)
7617                 return ret;
7618
7619         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7620         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7621         return 0;
7622 }
7623
7624 static void free_device_extent_record(struct cache_extent *cache)
7625 {
7626         struct device_extent_record *rec;
7627
7628         rec = container_of(cache, struct device_extent_record, cache);
7629         if (!list_empty(&rec->chunk_list))
7630                 list_del_init(&rec->chunk_list);
7631         if (!list_empty(&rec->device_list))
7632                 list_del_init(&rec->device_list);
7633         free(rec);
7634 }
7635
7636 void free_device_extent_tree(struct device_extent_tree *tree)
7637 {
7638         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7639 }
7640
7641 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7642 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7643                                  struct extent_buffer *leaf, int slot)
7644 {
7645         struct btrfs_extent_ref_v0 *ref0;
7646         struct btrfs_key key;
7647         int ret;
7648
7649         btrfs_item_key_to_cpu(leaf, &key, slot);
7650         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7651         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7652                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7653                                 0, 0);
7654         } else {
7655                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7656                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7657         }
7658         return ret;
7659 }
7660 #endif
7661
7662 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7663                                             struct btrfs_key *key,
7664                                             int slot)
7665 {
7666         struct btrfs_chunk *ptr;
7667         struct chunk_record *rec;
7668         int num_stripes, i;
7669
7670         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7671         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7672
7673         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7674         if (!rec) {
7675                 fprintf(stderr, "memory allocation failed\n");
7676                 exit(-1);
7677         }
7678
7679         INIT_LIST_HEAD(&rec->list);
7680         INIT_LIST_HEAD(&rec->dextents);
7681         rec->bg_rec = NULL;
7682
7683         rec->cache.start = key->offset;
7684         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7685
7686         rec->generation = btrfs_header_generation(leaf);
7687
7688         rec->objectid = key->objectid;
7689         rec->type = key->type;
7690         rec->offset = key->offset;
7691
7692         rec->length = rec->cache.size;
7693         rec->owner = btrfs_chunk_owner(leaf, ptr);
7694         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7695         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7696         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7697         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7698         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7699         rec->num_stripes = num_stripes;
7700         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7701
7702         for (i = 0; i < rec->num_stripes; ++i) {
7703                 rec->stripes[i].devid =
7704                         btrfs_stripe_devid_nr(leaf, ptr, i);
7705                 rec->stripes[i].offset =
7706                         btrfs_stripe_offset_nr(leaf, ptr, i);
7707                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7708                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7709                                 BTRFS_UUID_SIZE);
7710         }
7711
7712         return rec;
7713 }
7714
7715 static int process_chunk_item(struct cache_tree *chunk_cache,
7716                               struct btrfs_key *key, struct extent_buffer *eb,
7717                               int slot)
7718 {
7719         struct chunk_record *rec;
7720         struct btrfs_chunk *chunk;
7721         int ret = 0;
7722
7723         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7724         /*
7725          * Do extra check for this chunk item,
7726          *
7727          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7728          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7729          * and owner<->key_type check.
7730          */
7731         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7732                                       key->offset);
7733         if (ret < 0) {
7734                 error("chunk(%llu, %llu) is not valid, ignore it",
7735                       key->offset, btrfs_chunk_length(eb, chunk));
7736                 return 0;
7737         }
7738         rec = btrfs_new_chunk_record(eb, key, slot);
7739         ret = insert_cache_extent(chunk_cache, &rec->cache);
7740         if (ret) {
7741                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7742                         rec->offset, rec->length);
7743                 free(rec);
7744         }
7745
7746         return ret;
7747 }
7748
7749 static int process_device_item(struct rb_root *dev_cache,
7750                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7751 {
7752         struct btrfs_dev_item *ptr;
7753         struct device_record *rec;
7754         int ret = 0;
7755
7756         ptr = btrfs_item_ptr(eb,
7757                 slot, struct btrfs_dev_item);
7758
7759         rec = malloc(sizeof(*rec));
7760         if (!rec) {
7761                 fprintf(stderr, "memory allocation failed\n");
7762                 return -ENOMEM;
7763         }
7764
7765         rec->devid = key->offset;
7766         rec->generation = btrfs_header_generation(eb);
7767
7768         rec->objectid = key->objectid;
7769         rec->type = key->type;
7770         rec->offset = key->offset;
7771
7772         rec->devid = btrfs_device_id(eb, ptr);
7773         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7774         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7775
7776         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7777         if (ret) {
7778                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7779                 free(rec);
7780         }
7781
7782         return ret;
7783 }
7784
7785 struct block_group_record *
7786 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7787                              int slot)
7788 {
7789         struct btrfs_block_group_item *ptr;
7790         struct block_group_record *rec;
7791
7792         rec = calloc(1, sizeof(*rec));
7793         if (!rec) {
7794                 fprintf(stderr, "memory allocation failed\n");
7795                 exit(-1);
7796         }
7797
7798         rec->cache.start = key->objectid;
7799         rec->cache.size = key->offset;
7800
7801         rec->generation = btrfs_header_generation(leaf);
7802
7803         rec->objectid = key->objectid;
7804         rec->type = key->type;
7805         rec->offset = key->offset;
7806
7807         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7808         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7809
7810         INIT_LIST_HEAD(&rec->list);
7811
7812         return rec;
7813 }
7814
7815 static int process_block_group_item(struct block_group_tree *block_group_cache,
7816                                     struct btrfs_key *key,
7817                                     struct extent_buffer *eb, int slot)
7818 {
7819         struct block_group_record *rec;
7820         int ret = 0;
7821
7822         rec = btrfs_new_block_group_record(eb, key, slot);
7823         ret = insert_block_group_record(block_group_cache, rec);
7824         if (ret) {
7825                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7826                         rec->objectid, rec->offset);
7827                 free(rec);
7828         }
7829
7830         return ret;
7831 }
7832
7833 struct device_extent_record *
7834 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7835                                struct btrfs_key *key, int slot)
7836 {
7837         struct device_extent_record *rec;
7838         struct btrfs_dev_extent *ptr;
7839
7840         rec = calloc(1, sizeof(*rec));
7841         if (!rec) {
7842                 fprintf(stderr, "memory allocation failed\n");
7843                 exit(-1);
7844         }
7845
7846         rec->cache.objectid = key->objectid;
7847         rec->cache.start = key->offset;
7848
7849         rec->generation = btrfs_header_generation(leaf);
7850
7851         rec->objectid = key->objectid;
7852         rec->type = key->type;
7853         rec->offset = key->offset;
7854
7855         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7856         rec->chunk_objecteid =
7857                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7858         rec->chunk_offset =
7859                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7860         rec->length = btrfs_dev_extent_length(leaf, ptr);
7861         rec->cache.size = rec->length;
7862
7863         INIT_LIST_HEAD(&rec->chunk_list);
7864         INIT_LIST_HEAD(&rec->device_list);
7865
7866         return rec;
7867 }
7868
7869 static int
7870 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7871                            struct btrfs_key *key, struct extent_buffer *eb,
7872                            int slot)
7873 {
7874         struct device_extent_record *rec;
7875         int ret;
7876
7877         rec = btrfs_new_device_extent_record(eb, key, slot);
7878         ret = insert_device_extent_record(dev_extent_cache, rec);
7879         if (ret) {
7880                 fprintf(stderr,
7881                         "Device extent[%llu, %llu, %llu] existed.\n",
7882                         rec->objectid, rec->offset, rec->length);
7883                 free(rec);
7884         }
7885
7886         return ret;
7887 }
7888
7889 static int process_extent_item(struct btrfs_root *root,
7890                                struct cache_tree *extent_cache,
7891                                struct extent_buffer *eb, int slot)
7892 {
7893         struct btrfs_extent_item *ei;
7894         struct btrfs_extent_inline_ref *iref;
7895         struct btrfs_extent_data_ref *dref;
7896         struct btrfs_shared_data_ref *sref;
7897         struct btrfs_key key;
7898         struct extent_record tmpl;
7899         unsigned long end;
7900         unsigned long ptr;
7901         int ret;
7902         int type;
7903         u32 item_size = btrfs_item_size_nr(eb, slot);
7904         u64 refs = 0;
7905         u64 offset;
7906         u64 num_bytes;
7907         int metadata = 0;
7908
7909         btrfs_item_key_to_cpu(eb, &key, slot);
7910
7911         if (key.type == BTRFS_METADATA_ITEM_KEY) {
7912                 metadata = 1;
7913                 num_bytes = root->fs_info->nodesize;
7914         } else {
7915                 num_bytes = key.offset;
7916         }
7917
7918         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
7919                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
7920                       key.objectid, root->fs_info->sectorsize);
7921                 return -EIO;
7922         }
7923         if (item_size < sizeof(*ei)) {
7924 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7925                 struct btrfs_extent_item_v0 *ei0;
7926                 if (item_size != sizeof(*ei0)) {
7927                         error(
7928         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
7929                                 key.objectid, key.type, key.offset,
7930                                 btrfs_header_bytenr(eb), slot);
7931                         BUG();
7932                 }
7933                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
7934                 refs = btrfs_extent_refs_v0(eb, ei0);
7935 #else
7936                 BUG();
7937 #endif
7938                 memset(&tmpl, 0, sizeof(tmpl));
7939                 tmpl.start = key.objectid;
7940                 tmpl.nr = num_bytes;
7941                 tmpl.extent_item_refs = refs;
7942                 tmpl.metadata = metadata;
7943                 tmpl.found_rec = 1;
7944                 tmpl.max_size = num_bytes;
7945
7946                 return add_extent_rec(extent_cache, &tmpl);
7947         }
7948
7949         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
7950         refs = btrfs_extent_refs(eb, ei);
7951         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
7952                 metadata = 1;
7953         else
7954                 metadata = 0;
7955         if (metadata && num_bytes != root->fs_info->nodesize) {
7956                 error("ignore invalid metadata extent, length %llu does not equal to %u",
7957                       num_bytes, root->fs_info->nodesize);
7958                 return -EIO;
7959         }
7960         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
7961                 error("ignore invalid data extent, length %llu is not aligned to %u",
7962                       num_bytes, root->fs_info->sectorsize);
7963                 return -EIO;
7964         }
7965
7966         memset(&tmpl, 0, sizeof(tmpl));
7967         tmpl.start = key.objectid;
7968         tmpl.nr = num_bytes;
7969         tmpl.extent_item_refs = refs;
7970         tmpl.metadata = metadata;
7971         tmpl.found_rec = 1;
7972         tmpl.max_size = num_bytes;
7973         add_extent_rec(extent_cache, &tmpl);
7974
7975         ptr = (unsigned long)(ei + 1);
7976         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
7977             key.type == BTRFS_EXTENT_ITEM_KEY)
7978                 ptr += sizeof(struct btrfs_tree_block_info);
7979
7980         end = (unsigned long)ei + item_size;
7981         while (ptr < end) {
7982                 iref = (struct btrfs_extent_inline_ref *)ptr;
7983                 type = btrfs_extent_inline_ref_type(eb, iref);
7984                 offset = btrfs_extent_inline_ref_offset(eb, iref);
7985                 switch (type) {
7986                 case BTRFS_TREE_BLOCK_REF_KEY:
7987                         ret = add_tree_backref(extent_cache, key.objectid,
7988                                         0, offset, 0);
7989                         if (ret < 0)
7990                                 error(
7991                         "add_tree_backref failed (extent items tree block): %s",
7992                                       strerror(-ret));
7993                         break;
7994                 case BTRFS_SHARED_BLOCK_REF_KEY:
7995                         ret = add_tree_backref(extent_cache, key.objectid,
7996                                         offset, 0, 0);
7997                         if (ret < 0)
7998                                 error(
7999                         "add_tree_backref failed (extent items shared block): %s",
8000                                       strerror(-ret));
8001                         break;
8002                 case BTRFS_EXTENT_DATA_REF_KEY:
8003                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8004                         add_data_backref(extent_cache, key.objectid, 0,
8005                                         btrfs_extent_data_ref_root(eb, dref),
8006                                         btrfs_extent_data_ref_objectid(eb,
8007                                                                        dref),
8008                                         btrfs_extent_data_ref_offset(eb, dref),
8009                                         btrfs_extent_data_ref_count(eb, dref),
8010                                         0, num_bytes);
8011                         break;
8012                 case BTRFS_SHARED_DATA_REF_KEY:
8013                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8014                         add_data_backref(extent_cache, key.objectid, offset,
8015                                         0, 0, 0,
8016                                         btrfs_shared_data_ref_count(eb, sref),
8017                                         0, num_bytes);
8018                         break;
8019                 default:
8020                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8021                                 key.objectid, key.type, num_bytes);
8022                         goto out;
8023                 }
8024                 ptr += btrfs_extent_inline_ref_size(type);
8025         }
8026         WARN_ON(ptr > end);
8027 out:
8028         return 0;
8029 }
8030
8031 static int check_cache_range(struct btrfs_root *root,
8032                              struct btrfs_block_group_cache *cache,
8033                              u64 offset, u64 bytes)
8034 {
8035         struct btrfs_free_space *entry;
8036         u64 *logical;
8037         u64 bytenr;
8038         int stripe_len;
8039         int i, nr, ret;
8040
8041         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8042                 bytenr = btrfs_sb_offset(i);
8043                 ret = btrfs_rmap_block(root->fs_info,
8044                                        cache->key.objectid, bytenr, 0,
8045                                        &logical, &nr, &stripe_len);
8046                 if (ret)
8047                         return ret;
8048
8049                 while (nr--) {
8050                         if (logical[nr] + stripe_len <= offset)
8051                                 continue;
8052                         if (offset + bytes <= logical[nr])
8053                                 continue;
8054                         if (logical[nr] == offset) {
8055                                 if (stripe_len >= bytes) {
8056                                         free(logical);
8057                                         return 0;
8058                                 }
8059                                 bytes -= stripe_len;
8060                                 offset += stripe_len;
8061                         } else if (logical[nr] < offset) {
8062                                 if (logical[nr] + stripe_len >=
8063                                     offset + bytes) {
8064                                         free(logical);
8065                                         return 0;
8066                                 }
8067                                 bytes = (offset + bytes) -
8068                                         (logical[nr] + stripe_len);
8069                                 offset = logical[nr] + stripe_len;
8070                         } else {
8071                                 /*
8072                                  * Could be tricky, the super may land in the
8073                                  * middle of the area we're checking.  First
8074                                  * check the easiest case, it's at the end.
8075                                  */
8076                                 if (logical[nr] + stripe_len >=
8077                                     bytes + offset) {
8078                                         bytes = logical[nr] - offset;
8079                                         continue;
8080                                 }
8081
8082                                 /* Check the left side */
8083                                 ret = check_cache_range(root, cache,
8084                                                         offset,
8085                                                         logical[nr] - offset);
8086                                 if (ret) {
8087                                         free(logical);
8088                                         return ret;
8089                                 }
8090
8091                                 /* Now we continue with the right side */
8092                                 bytes = (offset + bytes) -
8093                                         (logical[nr] + stripe_len);
8094                                 offset = logical[nr] + stripe_len;
8095                         }
8096                 }
8097
8098                 free(logical);
8099         }
8100
8101         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8102         if (!entry) {
8103                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8104                         offset, offset+bytes);
8105                 return -EINVAL;
8106         }
8107
8108         if (entry->offset != offset) {
8109                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8110                         entry->offset);
8111                 return -EINVAL;
8112         }
8113
8114         if (entry->bytes != bytes) {
8115                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8116                         bytes, entry->bytes, offset);
8117                 return -EINVAL;
8118         }
8119
8120         unlink_free_space(cache->free_space_ctl, entry);
8121         free(entry);
8122         return 0;
8123 }
8124
8125 static int verify_space_cache(struct btrfs_root *root,
8126                               struct btrfs_block_group_cache *cache)
8127 {
8128         struct btrfs_path path;
8129         struct extent_buffer *leaf;
8130         struct btrfs_key key;
8131         u64 last;
8132         int ret = 0;
8133
8134         root = root->fs_info->extent_root;
8135
8136         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8137
8138         btrfs_init_path(&path);
8139         key.objectid = last;
8140         key.offset = 0;
8141         key.type = BTRFS_EXTENT_ITEM_KEY;
8142         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8143         if (ret < 0)
8144                 goto out;
8145         ret = 0;
8146         while (1) {
8147                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8148                         ret = btrfs_next_leaf(root, &path);
8149                         if (ret < 0)
8150                                 goto out;
8151                         if (ret > 0) {
8152                                 ret = 0;
8153                                 break;
8154                         }
8155                 }
8156                 leaf = path.nodes[0];
8157                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8158                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8159                         break;
8160                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8161                     key.type != BTRFS_METADATA_ITEM_KEY) {
8162                         path.slots[0]++;
8163                         continue;
8164                 }
8165
8166                 if (last == key.objectid) {
8167                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8168                                 last = key.objectid + key.offset;
8169                         else
8170                                 last = key.objectid + root->fs_info->nodesize;
8171                         path.slots[0]++;
8172                         continue;
8173                 }
8174
8175                 ret = check_cache_range(root, cache, last,
8176                                         key.objectid - last);
8177                 if (ret)
8178                         break;
8179                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8180                         last = key.objectid + key.offset;
8181                 else
8182                         last = key.objectid + root->fs_info->nodesize;
8183                 path.slots[0]++;
8184         }
8185
8186         if (last < cache->key.objectid + cache->key.offset)
8187                 ret = check_cache_range(root, cache, last,
8188                                         cache->key.objectid +
8189                                         cache->key.offset - last);
8190
8191 out:
8192         btrfs_release_path(&path);
8193
8194         if (!ret &&
8195             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8196                 fprintf(stderr, "There are still entries left in the space "
8197                         "cache\n");
8198                 ret = -EINVAL;
8199         }
8200
8201         return ret;
8202 }
8203
8204 static int check_space_cache(struct btrfs_root *root)
8205 {
8206         struct btrfs_block_group_cache *cache;
8207         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8208         int ret;
8209         int error = 0;
8210
8211         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8212             btrfs_super_generation(root->fs_info->super_copy) !=
8213             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8214                 printf("cache and super generation don't match, space cache "
8215                        "will be invalidated\n");
8216                 return 0;
8217         }
8218
8219         if (ctx.progress_enabled) {
8220                 ctx.tp = TASK_FREE_SPACE;
8221                 task_start(ctx.info);
8222         }
8223
8224         while (1) {
8225                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8226                 if (!cache)
8227                         break;
8228
8229                 start = cache->key.objectid + cache->key.offset;
8230                 if (!cache->free_space_ctl) {
8231                         if (btrfs_init_free_space_ctl(cache,
8232                                                 root->fs_info->sectorsize)) {
8233                                 ret = -ENOMEM;
8234                                 break;
8235                         }
8236                 } else {
8237                         btrfs_remove_free_space_cache(cache);
8238                 }
8239
8240                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8241                         ret = exclude_super_stripes(root, cache);
8242                         if (ret) {
8243                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8244                                         strerror(-ret));
8245                                 error++;
8246                                 continue;
8247                         }
8248                         ret = load_free_space_tree(root->fs_info, cache);
8249                         free_excluded_extents(root, cache);
8250                         if (ret < 0) {
8251                                 fprintf(stderr, "could not load free space tree: %s\n",
8252                                         strerror(-ret));
8253                                 error++;
8254                                 continue;
8255                         }
8256                         error += ret;
8257                 } else {
8258                         ret = load_free_space_cache(root->fs_info, cache);
8259                         if (!ret)
8260                                 continue;
8261                 }
8262
8263                 ret = verify_space_cache(root, cache);
8264                 if (ret) {
8265                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8266                                 cache->key.objectid);
8267                         error++;
8268                 }
8269         }
8270
8271         task_stop(ctx.info);
8272
8273         return error ? -EINVAL : 0;
8274 }
8275
8276 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8277                         u64 num_bytes, unsigned long leaf_offset,
8278                         struct extent_buffer *eb) {
8279
8280         struct btrfs_fs_info *fs_info = root->fs_info;
8281         u64 offset = 0;
8282         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8283         char *data;
8284         unsigned long csum_offset;
8285         u32 csum;
8286         u32 csum_expected;
8287         u64 read_len;
8288         u64 data_checked = 0;
8289         u64 tmp;
8290         int ret = 0;
8291         int mirror;
8292         int num_copies;
8293
8294         if (num_bytes % fs_info->sectorsize)
8295                 return -EINVAL;
8296
8297         data = malloc(num_bytes);
8298         if (!data)
8299                 return -ENOMEM;
8300
8301         while (offset < num_bytes) {
8302                 mirror = 0;
8303 again:
8304                 read_len = num_bytes - offset;
8305                 /* read as much space once a time */
8306                 ret = read_extent_data(fs_info, data + offset,
8307                                 bytenr + offset, &read_len, mirror);
8308                 if (ret)
8309                         goto out;
8310                 data_checked = 0;
8311                 /* verify every 4k data's checksum */
8312                 while (data_checked < read_len) {
8313                         csum = ~(u32)0;
8314                         tmp = offset + data_checked;
8315
8316                         csum = btrfs_csum_data((char *)data + tmp,
8317                                                csum, fs_info->sectorsize);
8318                         btrfs_csum_final(csum, (u8 *)&csum);
8319
8320                         csum_offset = leaf_offset +
8321                                  tmp / fs_info->sectorsize * csum_size;
8322                         read_extent_buffer(eb, (char *)&csum_expected,
8323                                            csum_offset, csum_size);
8324                         /* try another mirror */
8325                         if (csum != csum_expected) {
8326                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8327                                                 mirror, bytenr + tmp,
8328                                                 csum, csum_expected);
8329                                 num_copies = btrfs_num_copies(root->fs_info,
8330                                                 bytenr, num_bytes);
8331                                 if (mirror < num_copies - 1) {
8332                                         mirror += 1;
8333                                         goto again;
8334                                 }
8335                         }
8336                         data_checked += fs_info->sectorsize;
8337                 }
8338                 offset += read_len;
8339         }
8340 out:
8341         free(data);
8342         return ret;
8343 }
8344
8345 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8346                                u64 num_bytes)
8347 {
8348         struct btrfs_path path;
8349         struct extent_buffer *leaf;
8350         struct btrfs_key key;
8351         int ret;
8352
8353         btrfs_init_path(&path);
8354         key.objectid = bytenr;
8355         key.type = BTRFS_EXTENT_ITEM_KEY;
8356         key.offset = (u64)-1;
8357
8358 again:
8359         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8360                                 0, 0);
8361         if (ret < 0) {
8362                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8363                 btrfs_release_path(&path);
8364                 return ret;
8365         } else if (ret) {
8366                 if (path.slots[0] > 0) {
8367                         path.slots[0]--;
8368                 } else {
8369                         ret = btrfs_prev_leaf(root, &path);
8370                         if (ret < 0) {
8371                                 goto out;
8372                         } else if (ret > 0) {
8373                                 ret = 0;
8374                                 goto out;
8375                         }
8376                 }
8377         }
8378
8379         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8380
8381         /*
8382          * Block group items come before extent items if they have the same
8383          * bytenr, so walk back one more just in case.  Dear future traveller,
8384          * first congrats on mastering time travel.  Now if it's not too much
8385          * trouble could you go back to 2006 and tell Chris to make the
8386          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8387          * EXTENT_ITEM_KEY please?
8388          */
8389         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8390                 if (path.slots[0] > 0) {
8391                         path.slots[0]--;
8392                 } else {
8393                         ret = btrfs_prev_leaf(root, &path);
8394                         if (ret < 0) {
8395                                 goto out;
8396                         } else if (ret > 0) {
8397                                 ret = 0;
8398                                 goto out;
8399                         }
8400                 }
8401                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8402         }
8403
8404         while (num_bytes) {
8405                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8406                         ret = btrfs_next_leaf(root, &path);
8407                         if (ret < 0) {
8408                                 fprintf(stderr, "Error going to next leaf "
8409                                         "%d\n", ret);
8410                                 btrfs_release_path(&path);
8411                                 return ret;
8412                         } else if (ret) {
8413                                 break;
8414                         }
8415                 }
8416                 leaf = path.nodes[0];
8417                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8418                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8419                         path.slots[0]++;
8420                         continue;
8421                 }
8422                 if (key.objectid + key.offset < bytenr) {
8423                         path.slots[0]++;
8424                         continue;
8425                 }
8426                 if (key.objectid > bytenr + num_bytes)
8427                         break;
8428
8429                 if (key.objectid == bytenr) {
8430                         if (key.offset >= num_bytes) {
8431                                 num_bytes = 0;
8432                                 break;
8433                         }
8434                         num_bytes -= key.offset;
8435                         bytenr += key.offset;
8436                 } else if (key.objectid < bytenr) {
8437                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8438                                 num_bytes = 0;
8439                                 break;
8440                         }
8441                         num_bytes = (bytenr + num_bytes) -
8442                                 (key.objectid + key.offset);
8443                         bytenr = key.objectid + key.offset;
8444                 } else {
8445                         if (key.objectid + key.offset < bytenr + num_bytes) {
8446                                 u64 new_start = key.objectid + key.offset;
8447                                 u64 new_bytes = bytenr + num_bytes - new_start;
8448
8449                                 /*
8450                                  * Weird case, the extent is in the middle of
8451                                  * our range, we'll have to search one side
8452                                  * and then the other.  Not sure if this happens
8453                                  * in real life, but no harm in coding it up
8454                                  * anyway just in case.
8455                                  */
8456                                 btrfs_release_path(&path);
8457                                 ret = check_extent_exists(root, new_start,
8458                                                           new_bytes);
8459                                 if (ret) {
8460                                         fprintf(stderr, "Right section didn't "
8461                                                 "have a record\n");
8462                                         break;
8463                                 }
8464                                 num_bytes = key.objectid - bytenr;
8465                                 goto again;
8466                         }
8467                         num_bytes = key.objectid - bytenr;
8468                 }
8469                 path.slots[0]++;
8470         }
8471         ret = 0;
8472
8473 out:
8474         if (num_bytes && !ret) {
8475                 fprintf(stderr, "There are no extents for csum range "
8476                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8477                 ret = 1;
8478         }
8479
8480         btrfs_release_path(&path);
8481         return ret;
8482 }
8483
8484 static int check_csums(struct btrfs_root *root)
8485 {
8486         struct btrfs_path path;
8487         struct extent_buffer *leaf;
8488         struct btrfs_key key;
8489         u64 offset = 0, num_bytes = 0;
8490         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8491         int errors = 0;
8492         int ret;
8493         u64 data_len;
8494         unsigned long leaf_offset;
8495
8496         root = root->fs_info->csum_root;
8497         if (!extent_buffer_uptodate(root->node)) {
8498                 fprintf(stderr, "No valid csum tree found\n");
8499                 return -ENOENT;
8500         }
8501
8502         btrfs_init_path(&path);
8503         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8504         key.type = BTRFS_EXTENT_CSUM_KEY;
8505         key.offset = 0;
8506         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8507         if (ret < 0) {
8508                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8509                 btrfs_release_path(&path);
8510                 return ret;
8511         }
8512
8513         if (ret > 0 && path.slots[0])
8514                 path.slots[0]--;
8515         ret = 0;
8516
8517         while (1) {
8518                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8519                         ret = btrfs_next_leaf(root, &path);
8520                         if (ret < 0) {
8521                                 fprintf(stderr, "Error going to next leaf "
8522                                         "%d\n", ret);
8523                                 break;
8524                         }
8525                         if (ret)
8526                                 break;
8527                 }
8528                 leaf = path.nodes[0];
8529
8530                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8531                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8532                         path.slots[0]++;
8533                         continue;
8534                 }
8535
8536                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8537                               csum_size) * root->fs_info->sectorsize;
8538                 if (!check_data_csum)
8539                         goto skip_csum_check;
8540                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8541                 ret = check_extent_csums(root, key.offset, data_len,
8542                                          leaf_offset, leaf);
8543                 if (ret)
8544                         break;
8545 skip_csum_check:
8546                 if (!num_bytes) {
8547                         offset = key.offset;
8548                 } else if (key.offset != offset + num_bytes) {
8549                         ret = check_extent_exists(root, offset, num_bytes);
8550                         if (ret) {
8551                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8552                                         "there is no extent record\n",
8553                                         offset, offset+num_bytes);
8554                                 errors++;
8555                         }
8556                         offset = key.offset;
8557                         num_bytes = 0;
8558                 }
8559                 num_bytes += data_len;
8560                 path.slots[0]++;
8561         }
8562
8563         btrfs_release_path(&path);
8564         return errors;
8565 }
8566
8567 static int is_dropped_key(struct btrfs_key *key,
8568                           struct btrfs_key *drop_key) {
8569         if (key->objectid < drop_key->objectid)
8570                 return 1;
8571         else if (key->objectid == drop_key->objectid) {
8572                 if (key->type < drop_key->type)
8573                         return 1;
8574                 else if (key->type == drop_key->type) {
8575                         if (key->offset < drop_key->offset)
8576                                 return 1;
8577                 }
8578         }
8579         return 0;
8580 }
8581
8582 /*
8583  * Here are the rules for FULL_BACKREF.
8584  *
8585  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8586  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8587  *      FULL_BACKREF set.
8588  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8589  *    if it happened after the relocation occurred since we'll have dropped the
8590  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8591  *    have no real way to know for sure.
8592  *
8593  * We process the blocks one root at a time, and we start from the lowest root
8594  * objectid and go to the highest.  So we can just lookup the owner backref for
8595  * the record and if we don't find it then we know it doesn't exist and we have
8596  * a FULL BACKREF.
8597  *
8598  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8599  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8600  * be set or not and then we can check later once we've gathered all the refs.
8601  */
8602 static int calc_extent_flag(struct cache_tree *extent_cache,
8603                            struct extent_buffer *buf,
8604                            struct root_item_record *ri,
8605                            u64 *flags)
8606 {
8607         struct extent_record *rec;
8608         struct cache_extent *cache;
8609         struct tree_backref *tback;
8610         u64 owner = 0;
8611
8612         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8613         /* we have added this extent before */
8614         if (!cache)
8615                 return -ENOENT;
8616
8617         rec = container_of(cache, struct extent_record, cache);
8618
8619         /*
8620          * Except file/reloc tree, we can not have
8621          * FULL BACKREF MODE
8622          */
8623         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8624                 goto normal;
8625         /*
8626          * root node
8627          */
8628         if (buf->start == ri->bytenr)
8629                 goto normal;
8630
8631         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8632                 goto full_backref;
8633
8634         owner = btrfs_header_owner(buf);
8635         if (owner == ri->objectid)
8636                 goto normal;
8637
8638         tback = find_tree_backref(rec, 0, owner);
8639         if (!tback)
8640                 goto full_backref;
8641 normal:
8642         *flags = 0;
8643         if (rec->flag_block_full_backref != FLAG_UNSET &&
8644             rec->flag_block_full_backref != 0)
8645                 rec->bad_full_backref = 1;
8646         return 0;
8647 full_backref:
8648         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8649         if (rec->flag_block_full_backref != FLAG_UNSET &&
8650             rec->flag_block_full_backref != 1)
8651                 rec->bad_full_backref = 1;
8652         return 0;
8653 }
8654
8655 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8656 {
8657         fprintf(stderr, "Invalid key type(");
8658         print_key_type(stderr, 0, key_type);
8659         fprintf(stderr, ") found in root(");
8660         print_objectid(stderr, rootid, 0);
8661         fprintf(stderr, ")\n");
8662 }
8663
8664 /*
8665  * Check if the key is valid with its extent buffer.
8666  *
8667  * This is a early check in case invalid key exists in a extent buffer
8668  * This is not comprehensive yet, but should prevent wrong key/item passed
8669  * further
8670  */
8671 static int check_type_with_root(u64 rootid, u8 key_type)
8672 {
8673         switch (key_type) {
8674         /* Only valid in chunk tree */
8675         case BTRFS_DEV_ITEM_KEY:
8676         case BTRFS_CHUNK_ITEM_KEY:
8677                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8678                         goto err;
8679                 break;
8680         /* valid in csum and log tree */
8681         case BTRFS_CSUM_TREE_OBJECTID:
8682                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8683                       is_fstree(rootid)))
8684                         goto err;
8685                 break;
8686         case BTRFS_EXTENT_ITEM_KEY:
8687         case BTRFS_METADATA_ITEM_KEY:
8688         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8689                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8690                         goto err;
8691                 break;
8692         case BTRFS_ROOT_ITEM_KEY:
8693                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8694                         goto err;
8695                 break;
8696         case BTRFS_DEV_EXTENT_KEY:
8697                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8698                         goto err;
8699                 break;
8700         }
8701         return 0;
8702 err:
8703         report_mismatch_key_root(key_type, rootid);
8704         return -EINVAL;
8705 }
8706
8707 static int run_next_block(struct btrfs_root *root,
8708                           struct block_info *bits,
8709                           int bits_nr,
8710                           u64 *last,
8711                           struct cache_tree *pending,
8712                           struct cache_tree *seen,
8713                           struct cache_tree *reada,
8714                           struct cache_tree *nodes,
8715                           struct cache_tree *extent_cache,
8716                           struct cache_tree *chunk_cache,
8717                           struct rb_root *dev_cache,
8718                           struct block_group_tree *block_group_cache,
8719                           struct device_extent_tree *dev_extent_cache,
8720                           struct root_item_record *ri)
8721 {
8722         struct btrfs_fs_info *fs_info = root->fs_info;
8723         struct extent_buffer *buf;
8724         struct extent_record *rec = NULL;
8725         u64 bytenr;
8726         u32 size;
8727         u64 parent;
8728         u64 owner;
8729         u64 flags;
8730         u64 ptr;
8731         u64 gen = 0;
8732         int ret = 0;
8733         int i;
8734         int nritems;
8735         struct btrfs_key key;
8736         struct cache_extent *cache;
8737         int reada_bits;
8738
8739         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8740                                     bits_nr, &reada_bits);
8741         if (nritems == 0)
8742                 return 1;
8743
8744         if (!reada_bits) {
8745                 for(i = 0; i < nritems; i++) {
8746                         ret = add_cache_extent(reada, bits[i].start,
8747                                                bits[i].size);
8748                         if (ret == -EEXIST)
8749                                 continue;
8750
8751                         /* fixme, get the parent transid */
8752                         readahead_tree_block(fs_info, bits[i].start, 0);
8753                 }
8754         }
8755         *last = bits[0].start;
8756         bytenr = bits[0].start;
8757         size = bits[0].size;
8758
8759         cache = lookup_cache_extent(pending, bytenr, size);
8760         if (cache) {
8761                 remove_cache_extent(pending, cache);
8762                 free(cache);
8763         }
8764         cache = lookup_cache_extent(reada, bytenr, size);
8765         if (cache) {
8766                 remove_cache_extent(reada, cache);
8767                 free(cache);
8768         }
8769         cache = lookup_cache_extent(nodes, bytenr, size);
8770         if (cache) {
8771                 remove_cache_extent(nodes, cache);
8772                 free(cache);
8773         }
8774         cache = lookup_cache_extent(extent_cache, bytenr, size);
8775         if (cache) {
8776                 rec = container_of(cache, struct extent_record, cache);
8777                 gen = rec->parent_generation;
8778         }
8779
8780         /* fixme, get the real parent transid */
8781         buf = read_tree_block(root->fs_info, bytenr, gen);
8782         if (!extent_buffer_uptodate(buf)) {
8783                 record_bad_block_io(root->fs_info,
8784                                     extent_cache, bytenr, size);
8785                 goto out;
8786         }
8787
8788         nritems = btrfs_header_nritems(buf);
8789
8790         flags = 0;
8791         if (!init_extent_tree) {
8792                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8793                                        btrfs_header_level(buf), 1, NULL,
8794                                        &flags);
8795                 if (ret < 0) {
8796                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8797                         if (ret < 0) {
8798                                 fprintf(stderr, "Couldn't calc extent flags\n");
8799                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8800                         }
8801                 }
8802         } else {
8803                 flags = 0;
8804                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8805                 if (ret < 0) {
8806                         fprintf(stderr, "Couldn't calc extent flags\n");
8807                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8808                 }
8809         }
8810
8811         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8812                 if (ri != NULL &&
8813                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8814                     ri->objectid == btrfs_header_owner(buf)) {
8815                         /*
8816                          * Ok we got to this block from it's original owner and
8817                          * we have FULL_BACKREF set.  Relocation can leave
8818                          * converted blocks over so this is altogether possible,
8819                          * however it's not possible if the generation > the
8820                          * last snapshot, so check for this case.
8821                          */
8822                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8823                             btrfs_header_generation(buf) > ri->last_snapshot) {
8824                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8825                                 rec->bad_full_backref = 1;
8826                         }
8827                 }
8828         } else {
8829                 if (ri != NULL &&
8830                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8831                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8832                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8833                         rec->bad_full_backref = 1;
8834                 }
8835         }
8836
8837         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8838                 rec->flag_block_full_backref = 1;
8839                 parent = bytenr;
8840                 owner = 0;
8841         } else {
8842                 rec->flag_block_full_backref = 0;
8843                 parent = 0;
8844                 owner = btrfs_header_owner(buf);
8845         }
8846
8847         ret = check_block(root, extent_cache, buf, flags);
8848         if (ret)
8849                 goto out;
8850
8851         if (btrfs_is_leaf(buf)) {
8852                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8853                 for (i = 0; i < nritems; i++) {
8854                         struct btrfs_file_extent_item *fi;
8855                         btrfs_item_key_to_cpu(buf, &key, i);
8856                         /*
8857                          * Check key type against the leaf owner.
8858                          * Could filter quite a lot of early error if
8859                          * owner is correct
8860                          */
8861                         if (check_type_with_root(btrfs_header_owner(buf),
8862                                                  key.type)) {
8863                                 fprintf(stderr, "ignoring invalid key\n");
8864                                 continue;
8865                         }
8866                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8867                                 process_extent_item(root, extent_cache, buf,
8868                                                     i);
8869                                 continue;
8870                         }
8871                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8872                                 process_extent_item(root, extent_cache, buf,
8873                                                     i);
8874                                 continue;
8875                         }
8876                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8877                                 total_csum_bytes +=
8878                                         btrfs_item_size_nr(buf, i);
8879                                 continue;
8880                         }
8881                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8882                                 process_chunk_item(chunk_cache, &key, buf, i);
8883                                 continue;
8884                         }
8885                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8886                                 process_device_item(dev_cache, &key, buf, i);
8887                                 continue;
8888                         }
8889                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8890                                 process_block_group_item(block_group_cache,
8891                                         &key, buf, i);
8892                                 continue;
8893                         }
8894                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
8895                                 process_device_extent_item(dev_extent_cache,
8896                                         &key, buf, i);
8897                                 continue;
8898
8899                         }
8900                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
8901 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8902                                 process_extent_ref_v0(extent_cache, buf, i);
8903 #else
8904                                 BUG();
8905 #endif
8906                                 continue;
8907                         }
8908
8909                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
8910                                 ret = add_tree_backref(extent_cache,
8911                                                 key.objectid, 0, key.offset, 0);
8912                                 if (ret < 0)
8913                                         error(
8914                                 "add_tree_backref failed (leaf tree block): %s",
8915                                               strerror(-ret));
8916                                 continue;
8917                         }
8918                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
8919                                 ret = add_tree_backref(extent_cache,
8920                                                 key.objectid, key.offset, 0, 0);
8921                                 if (ret < 0)
8922                                         error(
8923                                 "add_tree_backref failed (leaf shared block): %s",
8924                                               strerror(-ret));
8925                                 continue;
8926                         }
8927                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
8928                                 struct btrfs_extent_data_ref *ref;
8929                                 ref = btrfs_item_ptr(buf, i,
8930                                                 struct btrfs_extent_data_ref);
8931                                 add_data_backref(extent_cache,
8932                                         key.objectid, 0,
8933                                         btrfs_extent_data_ref_root(buf, ref),
8934                                         btrfs_extent_data_ref_objectid(buf,
8935                                                                        ref),
8936                                         btrfs_extent_data_ref_offset(buf, ref),
8937                                         btrfs_extent_data_ref_count(buf, ref),
8938                                         0, root->fs_info->sectorsize);
8939                                 continue;
8940                         }
8941                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
8942                                 struct btrfs_shared_data_ref *ref;
8943                                 ref = btrfs_item_ptr(buf, i,
8944                                                 struct btrfs_shared_data_ref);
8945                                 add_data_backref(extent_cache,
8946                                         key.objectid, key.offset, 0, 0, 0,
8947                                         btrfs_shared_data_ref_count(buf, ref),
8948                                         0, root->fs_info->sectorsize);
8949                                 continue;
8950                         }
8951                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
8952                                 struct bad_item *bad;
8953
8954                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
8955                                         continue;
8956                                 if (!owner)
8957                                         continue;
8958                                 bad = malloc(sizeof(struct bad_item));
8959                                 if (!bad)
8960                                         continue;
8961                                 INIT_LIST_HEAD(&bad->list);
8962                                 memcpy(&bad->key, &key,
8963                                        sizeof(struct btrfs_key));
8964                                 bad->root_id = owner;
8965                                 list_add_tail(&bad->list, &delete_items);
8966                                 continue;
8967                         }
8968                         if (key.type != BTRFS_EXTENT_DATA_KEY)
8969                                 continue;
8970                         fi = btrfs_item_ptr(buf, i,
8971                                             struct btrfs_file_extent_item);
8972                         if (btrfs_file_extent_type(buf, fi) ==
8973                             BTRFS_FILE_EXTENT_INLINE)
8974                                 continue;
8975                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
8976                                 continue;
8977
8978                         data_bytes_allocated +=
8979                                 btrfs_file_extent_disk_num_bytes(buf, fi);
8980                         if (data_bytes_allocated < root->fs_info->sectorsize) {
8981                                 abort();
8982                         }
8983                         data_bytes_referenced +=
8984                                 btrfs_file_extent_num_bytes(buf, fi);
8985                         add_data_backref(extent_cache,
8986                                 btrfs_file_extent_disk_bytenr(buf, fi),
8987                                 parent, owner, key.objectid, key.offset -
8988                                 btrfs_file_extent_offset(buf, fi), 1, 1,
8989                                 btrfs_file_extent_disk_num_bytes(buf, fi));
8990                 }
8991         } else {
8992                 int level;
8993                 struct btrfs_key first_key;
8994
8995                 first_key.objectid = 0;
8996
8997                 if (nritems > 0)
8998                         btrfs_item_key_to_cpu(buf, &first_key, 0);
8999                 level = btrfs_header_level(buf);
9000                 for (i = 0; i < nritems; i++) {
9001                         struct extent_record tmpl;
9002
9003                         ptr = btrfs_node_blockptr(buf, i);
9004                         size = root->fs_info->nodesize;
9005                         btrfs_node_key_to_cpu(buf, &key, i);
9006                         if (ri != NULL) {
9007                                 if ((level == ri->drop_level)
9008                                     && is_dropped_key(&key, &ri->drop_key)) {
9009                                         continue;
9010                                 }
9011                         }
9012
9013                         memset(&tmpl, 0, sizeof(tmpl));
9014                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9015                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9016                         tmpl.start = ptr;
9017                         tmpl.nr = size;
9018                         tmpl.refs = 1;
9019                         tmpl.metadata = 1;
9020                         tmpl.max_size = size;
9021                         ret = add_extent_rec(extent_cache, &tmpl);
9022                         if (ret < 0)
9023                                 goto out;
9024
9025                         ret = add_tree_backref(extent_cache, ptr, parent,
9026                                         owner, 1);
9027                         if (ret < 0) {
9028                                 error(
9029                                 "add_tree_backref failed (non-leaf block): %s",
9030                                       strerror(-ret));
9031                                 continue;
9032                         }
9033
9034                         if (level > 1) {
9035                                 add_pending(nodes, seen, ptr, size);
9036                         } else {
9037                                 add_pending(pending, seen, ptr, size);
9038                         }
9039                 }
9040                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9041                                       nritems) * sizeof(struct btrfs_key_ptr);
9042         }
9043         total_btree_bytes += buf->len;
9044         if (fs_root_objectid(btrfs_header_owner(buf)))
9045                 total_fs_tree_bytes += buf->len;
9046         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9047                 total_extent_tree_bytes += buf->len;
9048 out:
9049         free_extent_buffer(buf);
9050         return ret;
9051 }
9052
9053 static int add_root_to_pending(struct extent_buffer *buf,
9054                                struct cache_tree *extent_cache,
9055                                struct cache_tree *pending,
9056                                struct cache_tree *seen,
9057                                struct cache_tree *nodes,
9058                                u64 objectid)
9059 {
9060         struct extent_record tmpl;
9061         int ret;
9062
9063         if (btrfs_header_level(buf) > 0)
9064                 add_pending(nodes, seen, buf->start, buf->len);
9065         else
9066                 add_pending(pending, seen, buf->start, buf->len);
9067
9068         memset(&tmpl, 0, sizeof(tmpl));
9069         tmpl.start = buf->start;
9070         tmpl.nr = buf->len;
9071         tmpl.is_root = 1;
9072         tmpl.refs = 1;
9073         tmpl.metadata = 1;
9074         tmpl.max_size = buf->len;
9075         add_extent_rec(extent_cache, &tmpl);
9076
9077         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9078             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9079                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9080                                 0, 1);
9081         else
9082                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9083                                 1);
9084         return ret;
9085 }
9086
9087 /* as we fix the tree, we might be deleting blocks that
9088  * we're tracking for repair.  This hook makes sure we
9089  * remove any backrefs for blocks as we are fixing them.
9090  */
9091 static int free_extent_hook(struct btrfs_trans_handle *trans,
9092                             struct btrfs_root *root,
9093                             u64 bytenr, u64 num_bytes, u64 parent,
9094                             u64 root_objectid, u64 owner, u64 offset,
9095                             int refs_to_drop)
9096 {
9097         struct extent_record *rec;
9098         struct cache_extent *cache;
9099         int is_data;
9100         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9101
9102         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9103         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9104         if (!cache)
9105                 return 0;
9106
9107         rec = container_of(cache, struct extent_record, cache);
9108         if (is_data) {
9109                 struct data_backref *back;
9110                 back = find_data_backref(rec, parent, root_objectid, owner,
9111                                          offset, 1, bytenr, num_bytes);
9112                 if (!back)
9113                         goto out;
9114                 if (back->node.found_ref) {
9115                         back->found_ref -= refs_to_drop;
9116                         if (rec->refs)
9117                                 rec->refs -= refs_to_drop;
9118                 }
9119                 if (back->node.found_extent_tree) {
9120                         back->num_refs -= refs_to_drop;
9121                         if (rec->extent_item_refs)
9122                                 rec->extent_item_refs -= refs_to_drop;
9123                 }
9124                 if (back->found_ref == 0)
9125                         back->node.found_ref = 0;
9126                 if (back->num_refs == 0)
9127                         back->node.found_extent_tree = 0;
9128
9129                 if (!back->node.found_extent_tree && back->node.found_ref) {
9130                         rb_erase(&back->node.node, &rec->backref_tree);
9131                         free(back);
9132                 }
9133         } else {
9134                 struct tree_backref *back;
9135                 back = find_tree_backref(rec, parent, root_objectid);
9136                 if (!back)
9137                         goto out;
9138                 if (back->node.found_ref) {
9139                         if (rec->refs)
9140                                 rec->refs--;
9141                         back->node.found_ref = 0;
9142                 }
9143                 if (back->node.found_extent_tree) {
9144                         if (rec->extent_item_refs)
9145                                 rec->extent_item_refs--;
9146                         back->node.found_extent_tree = 0;
9147                 }
9148                 if (!back->node.found_extent_tree && back->node.found_ref) {
9149                         rb_erase(&back->node.node, &rec->backref_tree);
9150                         free(back);
9151                 }
9152         }
9153         maybe_free_extent_rec(extent_cache, rec);
9154 out:
9155         return 0;
9156 }
9157
9158 static int delete_extent_records(struct btrfs_trans_handle *trans,
9159                                  struct btrfs_root *root,
9160                                  struct btrfs_path *path,
9161                                  u64 bytenr)
9162 {
9163         struct btrfs_key key;
9164         struct btrfs_key found_key;
9165         struct extent_buffer *leaf;
9166         int ret;
9167         int slot;
9168
9169
9170         key.objectid = bytenr;
9171         key.type = (u8)-1;
9172         key.offset = (u64)-1;
9173
9174         while(1) {
9175                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9176                                         &key, path, 0, 1);
9177                 if (ret < 0)
9178                         break;
9179
9180                 if (ret > 0) {
9181                         ret = 0;
9182                         if (path->slots[0] == 0)
9183                                 break;
9184                         path->slots[0]--;
9185                 }
9186                 ret = 0;
9187
9188                 leaf = path->nodes[0];
9189                 slot = path->slots[0];
9190
9191                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9192                 if (found_key.objectid != bytenr)
9193                         break;
9194
9195                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9196                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9197                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9198                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9199                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9200                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9201                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9202                         btrfs_release_path(path);
9203                         if (found_key.type == 0) {
9204                                 if (found_key.offset == 0)
9205                                         break;
9206                                 key.offset = found_key.offset - 1;
9207                                 key.type = found_key.type;
9208                         }
9209                         key.type = found_key.type - 1;
9210                         key.offset = (u64)-1;
9211                         continue;
9212                 }
9213
9214                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9215                         found_key.objectid, found_key.type, found_key.offset);
9216
9217                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9218                 if (ret)
9219                         break;
9220                 btrfs_release_path(path);
9221
9222                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9223                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9224                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9225                                 found_key.offset : root->fs_info->nodesize;
9226
9227                         ret = btrfs_update_block_group(root, bytenr,
9228                                                        bytes, 0, 0);
9229                         if (ret)
9230                                 break;
9231                 }
9232         }
9233
9234         btrfs_release_path(path);
9235         return ret;
9236 }
9237
9238 /*
9239  * for a single backref, this will allocate a new extent
9240  * and add the backref to it.
9241  */
9242 static int record_extent(struct btrfs_trans_handle *trans,
9243                          struct btrfs_fs_info *info,
9244                          struct btrfs_path *path,
9245                          struct extent_record *rec,
9246                          struct extent_backref *back,
9247                          int allocated, u64 flags)
9248 {
9249         int ret = 0;
9250         struct btrfs_root *extent_root = info->extent_root;
9251         struct extent_buffer *leaf;
9252         struct btrfs_key ins_key;
9253         struct btrfs_extent_item *ei;
9254         struct data_backref *dback;
9255         struct btrfs_tree_block_info *bi;
9256
9257         if (!back->is_data)
9258                 rec->max_size = max_t(u64, rec->max_size,
9259                                     info->nodesize);
9260
9261         if (!allocated) {
9262                 u32 item_size = sizeof(*ei);
9263
9264                 if (!back->is_data)
9265                         item_size += sizeof(*bi);
9266
9267                 ins_key.objectid = rec->start;
9268                 ins_key.offset = rec->max_size;
9269                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9270
9271                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9272                                         &ins_key, item_size);
9273                 if (ret)
9274                         goto fail;
9275
9276                 leaf = path->nodes[0];
9277                 ei = btrfs_item_ptr(leaf, path->slots[0],
9278                                     struct btrfs_extent_item);
9279
9280                 btrfs_set_extent_refs(leaf, ei, 0);
9281                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9282
9283                 if (back->is_data) {
9284                         btrfs_set_extent_flags(leaf, ei,
9285                                                BTRFS_EXTENT_FLAG_DATA);
9286                 } else {
9287                         struct btrfs_disk_key copy_key;;
9288
9289                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9290                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9291                                              sizeof(*bi));
9292
9293                         btrfs_set_disk_key_objectid(&copy_key,
9294                                                     rec->info_objectid);
9295                         btrfs_set_disk_key_type(&copy_key, 0);
9296                         btrfs_set_disk_key_offset(&copy_key, 0);
9297
9298                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9299                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9300
9301                         btrfs_set_extent_flags(leaf, ei,
9302                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9303                 }
9304
9305                 btrfs_mark_buffer_dirty(leaf);
9306                 ret = btrfs_update_block_group(extent_root, rec->start,
9307                                                rec->max_size, 1, 0);
9308                 if (ret)
9309                         goto fail;
9310                 btrfs_release_path(path);
9311         }
9312
9313         if (back->is_data) {
9314                 u64 parent;
9315                 int i;
9316
9317                 dback = to_data_backref(back);
9318                 if (back->full_backref)
9319                         parent = dback->parent;
9320                 else
9321                         parent = 0;
9322
9323                 for (i = 0; i < dback->found_ref; i++) {
9324                         /* if parent != 0, we're doing a full backref
9325                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9326                          * just makes the backref allocator create a data
9327                          * backref
9328                          */
9329                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9330                                                    rec->start, rec->max_size,
9331                                                    parent,
9332                                                    dback->root,
9333                                                    parent ?
9334                                                    BTRFS_FIRST_FREE_OBJECTID :
9335                                                    dback->owner,
9336                                                    dback->offset);
9337                         if (ret)
9338                                 break;
9339                 }
9340                 fprintf(stderr, "adding new data backref"
9341                                 " on %llu %s %llu owner %llu"
9342                                 " offset %llu found %d\n",
9343                                 (unsigned long long)rec->start,
9344                                 back->full_backref ?
9345                                 "parent" : "root",
9346                                 back->full_backref ?
9347                                 (unsigned long long)parent :
9348                                 (unsigned long long)dback->root,
9349                                 (unsigned long long)dback->owner,
9350                                 (unsigned long long)dback->offset,
9351                                 dback->found_ref);
9352         } else {
9353                 u64 parent;
9354                 struct tree_backref *tback;
9355
9356                 tback = to_tree_backref(back);
9357                 if (back->full_backref)
9358                         parent = tback->parent;
9359                 else
9360                         parent = 0;
9361
9362                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9363                                            rec->start, rec->max_size,
9364                                            parent, tback->root, 0, 0);
9365                 fprintf(stderr, "adding new tree backref on "
9366                         "start %llu len %llu parent %llu root %llu\n",
9367                         rec->start, rec->max_size, parent, tback->root);
9368         }
9369 fail:
9370         btrfs_release_path(path);
9371         return ret;
9372 }
9373
9374 static struct extent_entry *find_entry(struct list_head *entries,
9375                                        u64 bytenr, u64 bytes)
9376 {
9377         struct extent_entry *entry = NULL;
9378
9379         list_for_each_entry(entry, entries, list) {
9380                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9381                         return entry;
9382         }
9383
9384         return NULL;
9385 }
9386
9387 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9388 {
9389         struct extent_entry *entry, *best = NULL, *prev = NULL;
9390
9391         list_for_each_entry(entry, entries, list) {
9392                 /*
9393                  * If there are as many broken entries as entries then we know
9394                  * not to trust this particular entry.
9395                  */
9396                 if (entry->broken == entry->count)
9397                         continue;
9398
9399                 /*
9400                  * Special case, when there are only two entries and 'best' is
9401                  * the first one
9402                  */
9403                 if (!prev) {
9404                         best = entry;
9405                         prev = entry;
9406                         continue;
9407                 }
9408
9409                 /*
9410                  * If our current entry == best then we can't be sure our best
9411                  * is really the best, so we need to keep searching.
9412                  */
9413                 if (best && best->count == entry->count) {
9414                         prev = entry;
9415                         best = NULL;
9416                         continue;
9417                 }
9418
9419                 /* Prev == entry, not good enough, have to keep searching */
9420                 if (!prev->broken && prev->count == entry->count)
9421                         continue;
9422
9423                 if (!best)
9424                         best = (prev->count > entry->count) ? prev : entry;
9425                 else if (best->count < entry->count)
9426                         best = entry;
9427                 prev = entry;
9428         }
9429
9430         return best;
9431 }
9432
9433 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9434                       struct data_backref *dback, struct extent_entry *entry)
9435 {
9436         struct btrfs_trans_handle *trans;
9437         struct btrfs_root *root;
9438         struct btrfs_file_extent_item *fi;
9439         struct extent_buffer *leaf;
9440         struct btrfs_key key;
9441         u64 bytenr, bytes;
9442         int ret, err;
9443
9444         key.objectid = dback->root;
9445         key.type = BTRFS_ROOT_ITEM_KEY;
9446         key.offset = (u64)-1;
9447         root = btrfs_read_fs_root(info, &key);
9448         if (IS_ERR(root)) {
9449                 fprintf(stderr, "Couldn't find root for our ref\n");
9450                 return -EINVAL;
9451         }
9452
9453         /*
9454          * The backref points to the original offset of the extent if it was
9455          * split, so we need to search down to the offset we have and then walk
9456          * forward until we find the backref we're looking for.
9457          */
9458         key.objectid = dback->owner;
9459         key.type = BTRFS_EXTENT_DATA_KEY;
9460         key.offset = dback->offset;
9461         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9462         if (ret < 0) {
9463                 fprintf(stderr, "Error looking up ref %d\n", ret);
9464                 return ret;
9465         }
9466
9467         while (1) {
9468                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9469                         ret = btrfs_next_leaf(root, path);
9470                         if (ret) {
9471                                 fprintf(stderr, "Couldn't find our ref, next\n");
9472                                 return -EINVAL;
9473                         }
9474                 }
9475                 leaf = path->nodes[0];
9476                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9477                 if (key.objectid != dback->owner ||
9478                     key.type != BTRFS_EXTENT_DATA_KEY) {
9479                         fprintf(stderr, "Couldn't find our ref, search\n");
9480                         return -EINVAL;
9481                 }
9482                 fi = btrfs_item_ptr(leaf, path->slots[0],
9483                                     struct btrfs_file_extent_item);
9484                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9485                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9486
9487                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9488                         break;
9489                 path->slots[0]++;
9490         }
9491
9492         btrfs_release_path(path);
9493
9494         trans = btrfs_start_transaction(root, 1);
9495         if (IS_ERR(trans))
9496                 return PTR_ERR(trans);
9497
9498         /*
9499          * Ok we have the key of the file extent we want to fix, now we can cow
9500          * down to the thing and fix it.
9501          */
9502         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9503         if (ret < 0) {
9504                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9505                         key.objectid, key.type, key.offset, ret);
9506                 goto out;
9507         }
9508         if (ret > 0) {
9509                 fprintf(stderr, "Well that's odd, we just found this key "
9510                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9511                         key.offset);
9512                 ret = -EINVAL;
9513                 goto out;
9514         }
9515         leaf = path->nodes[0];
9516         fi = btrfs_item_ptr(leaf, path->slots[0],
9517                             struct btrfs_file_extent_item);
9518
9519         if (btrfs_file_extent_compression(leaf, fi) &&
9520             dback->disk_bytenr != entry->bytenr) {
9521                 fprintf(stderr, "Ref doesn't match the record start and is "
9522                         "compressed, please take a btrfs-image of this file "
9523                         "system and send it to a btrfs developer so they can "
9524                         "complete this functionality for bytenr %Lu\n",
9525                         dback->disk_bytenr);
9526                 ret = -EINVAL;
9527                 goto out;
9528         }
9529
9530         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9531                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9532         } else if (dback->disk_bytenr > entry->bytenr) {
9533                 u64 off_diff, offset;
9534
9535                 off_diff = dback->disk_bytenr - entry->bytenr;
9536                 offset = btrfs_file_extent_offset(leaf, fi);
9537                 if (dback->disk_bytenr + offset +
9538                     btrfs_file_extent_num_bytes(leaf, fi) >
9539                     entry->bytenr + entry->bytes) {
9540                         fprintf(stderr, "Ref is past the entry end, please "
9541                                 "take a btrfs-image of this file system and "
9542                                 "send it to a btrfs developer, ref %Lu\n",
9543                                 dback->disk_bytenr);
9544                         ret = -EINVAL;
9545                         goto out;
9546                 }
9547                 offset += off_diff;
9548                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9549                 btrfs_set_file_extent_offset(leaf, fi, offset);
9550         } else if (dback->disk_bytenr < entry->bytenr) {
9551                 u64 offset;
9552
9553                 offset = btrfs_file_extent_offset(leaf, fi);
9554                 if (dback->disk_bytenr + offset < entry->bytenr) {
9555                         fprintf(stderr, "Ref is before the entry start, please"
9556                                 " take a btrfs-image of this file system and "
9557                                 "send it to a btrfs developer, ref %Lu\n",
9558                                 dback->disk_bytenr);
9559                         ret = -EINVAL;
9560                         goto out;
9561                 }
9562
9563                 offset += dback->disk_bytenr;
9564                 offset -= entry->bytenr;
9565                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9566                 btrfs_set_file_extent_offset(leaf, fi, offset);
9567         }
9568
9569         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9570
9571         /*
9572          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9573          * only do this if we aren't using compression, otherwise it's a
9574          * trickier case.
9575          */
9576         if (!btrfs_file_extent_compression(leaf, fi))
9577                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9578         else
9579                 printf("ram bytes may be wrong?\n");
9580         btrfs_mark_buffer_dirty(leaf);
9581 out:
9582         err = btrfs_commit_transaction(trans, root);
9583         btrfs_release_path(path);
9584         return ret ? ret : err;
9585 }
9586
9587 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9588                            struct extent_record *rec)
9589 {
9590         struct extent_backref *back, *tmp;
9591         struct data_backref *dback;
9592         struct extent_entry *entry, *best = NULL;
9593         LIST_HEAD(entries);
9594         int nr_entries = 0;
9595         int broken_entries = 0;
9596         int ret = 0;
9597         short mismatch = 0;
9598
9599         /*
9600          * Metadata is easy and the backrefs should always agree on bytenr and
9601          * size, if not we've got bigger issues.
9602          */
9603         if (rec->metadata)
9604                 return 0;
9605
9606         rbtree_postorder_for_each_entry_safe(back, tmp,
9607                                              &rec->backref_tree, node) {
9608                 if (back->full_backref || !back->is_data)
9609                         continue;
9610
9611                 dback = to_data_backref(back);
9612
9613                 /*
9614                  * We only pay attention to backrefs that we found a real
9615                  * backref for.
9616                  */
9617                 if (dback->found_ref == 0)
9618                         continue;
9619
9620                 /*
9621                  * For now we only catch when the bytes don't match, not the
9622                  * bytenr.  We can easily do this at the same time, but I want
9623                  * to have a fs image to test on before we just add repair
9624                  * functionality willy-nilly so we know we won't screw up the
9625                  * repair.
9626                  */
9627
9628                 entry = find_entry(&entries, dback->disk_bytenr,
9629                                    dback->bytes);
9630                 if (!entry) {
9631                         entry = malloc(sizeof(struct extent_entry));
9632                         if (!entry) {
9633                                 ret = -ENOMEM;
9634                                 goto out;
9635                         }
9636                         memset(entry, 0, sizeof(*entry));
9637                         entry->bytenr = dback->disk_bytenr;
9638                         entry->bytes = dback->bytes;
9639                         list_add_tail(&entry->list, &entries);
9640                         nr_entries++;
9641                 }
9642
9643                 /*
9644                  * If we only have on entry we may think the entries agree when
9645                  * in reality they don't so we have to do some extra checking.
9646                  */
9647                 if (dback->disk_bytenr != rec->start ||
9648                     dback->bytes != rec->nr || back->broken)
9649                         mismatch = 1;
9650
9651                 if (back->broken) {
9652                         entry->broken++;
9653                         broken_entries++;
9654                 }
9655
9656                 entry->count++;
9657         }
9658
9659         /* Yay all the backrefs agree, carry on good sir */
9660         if (nr_entries <= 1 && !mismatch)
9661                 goto out;
9662
9663         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9664                 "%Lu\n", rec->start);
9665
9666         /*
9667          * First we want to see if the backrefs can agree amongst themselves who
9668          * is right, so figure out which one of the entries has the highest
9669          * count.
9670          */
9671         best = find_most_right_entry(&entries);
9672
9673         /*
9674          * Ok so we may have an even split between what the backrefs think, so
9675          * this is where we use the extent ref to see what it thinks.
9676          */
9677         if (!best) {
9678                 entry = find_entry(&entries, rec->start, rec->nr);
9679                 if (!entry && (!broken_entries || !rec->found_rec)) {
9680                         fprintf(stderr, "Backrefs don't agree with each other "
9681                                 "and extent record doesn't agree with anybody,"
9682                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9683                                 rec->start, rec->nr);
9684                         ret = -EINVAL;
9685                         goto out;
9686                 } else if (!entry) {
9687                         /*
9688                          * Ok our backrefs were broken, we'll assume this is the
9689                          * correct value and add an entry for this range.
9690                          */
9691                         entry = malloc(sizeof(struct extent_entry));
9692                         if (!entry) {
9693                                 ret = -ENOMEM;
9694                                 goto out;
9695                         }
9696                         memset(entry, 0, sizeof(*entry));
9697                         entry->bytenr = rec->start;
9698                         entry->bytes = rec->nr;
9699                         list_add_tail(&entry->list, &entries);
9700                         nr_entries++;
9701                 }
9702                 entry->count++;
9703                 best = find_most_right_entry(&entries);
9704                 if (!best) {
9705                         fprintf(stderr, "Backrefs and extent record evenly "
9706                                 "split on who is right, this is going to "
9707                                 "require user input to fix bytenr %Lu bytes "
9708                                 "%Lu\n", rec->start, rec->nr);
9709                         ret = -EINVAL;
9710                         goto out;
9711                 }
9712         }
9713
9714         /*
9715          * I don't think this can happen currently as we'll abort() if we catch
9716          * this case higher up, but in case somebody removes that we still can't
9717          * deal with it properly here yet, so just bail out of that's the case.
9718          */
9719         if (best->bytenr != rec->start) {
9720                 fprintf(stderr, "Extent start and backref starts don't match, "
9721                         "please use btrfs-image on this file system and send "
9722                         "it to a btrfs developer so they can make fsck fix "
9723                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9724                         rec->start, rec->nr);
9725                 ret = -EINVAL;
9726                 goto out;
9727         }
9728
9729         /*
9730          * Ok great we all agreed on an extent record, let's go find the real
9731          * references and fix up the ones that don't match.
9732          */
9733         rbtree_postorder_for_each_entry_safe(back, tmp,
9734                                              &rec->backref_tree, node) {
9735                 if (back->full_backref || !back->is_data)
9736                         continue;
9737
9738                 dback = to_data_backref(back);
9739
9740                 /*
9741                  * Still ignoring backrefs that don't have a real ref attached
9742                  * to them.
9743                  */
9744                 if (dback->found_ref == 0)
9745                         continue;
9746
9747                 if (dback->bytes == best->bytes &&
9748                     dback->disk_bytenr == best->bytenr)
9749                         continue;
9750
9751                 ret = repair_ref(info, path, dback, best);
9752                 if (ret)
9753                         goto out;
9754         }
9755
9756         /*
9757          * Ok we messed with the actual refs, which means we need to drop our
9758          * entire cache and go back and rescan.  I know this is a huge pain and
9759          * adds a lot of extra work, but it's the only way to be safe.  Once all
9760          * the backrefs agree we may not need to do anything to the extent
9761          * record itself.
9762          */
9763         ret = -EAGAIN;
9764 out:
9765         while (!list_empty(&entries)) {
9766                 entry = list_entry(entries.next, struct extent_entry, list);
9767                 list_del_init(&entry->list);
9768                 free(entry);
9769         }
9770         return ret;
9771 }
9772
9773 static int process_duplicates(struct cache_tree *extent_cache,
9774                               struct extent_record *rec)
9775 {
9776         struct extent_record *good, *tmp;
9777         struct cache_extent *cache;
9778         int ret;
9779
9780         /*
9781          * If we found a extent record for this extent then return, or if we
9782          * have more than one duplicate we are likely going to need to delete
9783          * something.
9784          */
9785         if (rec->found_rec || rec->num_duplicates > 1)
9786                 return 0;
9787
9788         /* Shouldn't happen but just in case */
9789         BUG_ON(!rec->num_duplicates);
9790
9791         /*
9792          * So this happens if we end up with a backref that doesn't match the
9793          * actual extent entry.  So either the backref is bad or the extent
9794          * entry is bad.  Either way we want to have the extent_record actually
9795          * reflect what we found in the extent_tree, so we need to take the
9796          * duplicate out and use that as the extent_record since the only way we
9797          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9798          */
9799         remove_cache_extent(extent_cache, &rec->cache);
9800
9801         good = to_extent_record(rec->dups.next);
9802         list_del_init(&good->list);
9803         INIT_LIST_HEAD(&good->backrefs);
9804         INIT_LIST_HEAD(&good->dups);
9805         good->cache.start = good->start;
9806         good->cache.size = good->nr;
9807         good->content_checked = 0;
9808         good->owner_ref_checked = 0;
9809         good->num_duplicates = 0;
9810         good->refs = rec->refs;
9811         list_splice_init(&rec->backrefs, &good->backrefs);
9812         while (1) {
9813                 cache = lookup_cache_extent(extent_cache, good->start,
9814                                             good->nr);
9815                 if (!cache)
9816                         break;
9817                 tmp = container_of(cache, struct extent_record, cache);
9818
9819                 /*
9820                  * If we find another overlapping extent and it's found_rec is
9821                  * set then it's a duplicate and we need to try and delete
9822                  * something.
9823                  */
9824                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9825                         if (list_empty(&good->list))
9826                                 list_add_tail(&good->list,
9827                                               &duplicate_extents);
9828                         good->num_duplicates += tmp->num_duplicates + 1;
9829                         list_splice_init(&tmp->dups, &good->dups);
9830                         list_del_init(&tmp->list);
9831                         list_add_tail(&tmp->list, &good->dups);
9832                         remove_cache_extent(extent_cache, &tmp->cache);
9833                         continue;
9834                 }
9835
9836                 /*
9837                  * Ok we have another non extent item backed extent rec, so lets
9838                  * just add it to this extent and carry on like we did above.
9839                  */
9840                 good->refs += tmp->refs;
9841                 list_splice_init(&tmp->backrefs, &good->backrefs);
9842                 remove_cache_extent(extent_cache, &tmp->cache);
9843                 free(tmp);
9844         }
9845         ret = insert_cache_extent(extent_cache, &good->cache);
9846         BUG_ON(ret);
9847         free(rec);
9848         return good->num_duplicates ? 0 : 1;
9849 }
9850
9851 static int delete_duplicate_records(struct btrfs_root *root,
9852                                     struct extent_record *rec)
9853 {
9854         struct btrfs_trans_handle *trans;
9855         LIST_HEAD(delete_list);
9856         struct btrfs_path path;
9857         struct extent_record *tmp, *good, *n;
9858         int nr_del = 0;
9859         int ret = 0, err;
9860         struct btrfs_key key;
9861
9862         btrfs_init_path(&path);
9863
9864         good = rec;
9865         /* Find the record that covers all of the duplicates. */
9866         list_for_each_entry(tmp, &rec->dups, list) {
9867                 if (good->start < tmp->start)
9868                         continue;
9869                 if (good->nr > tmp->nr)
9870                         continue;
9871
9872                 if (tmp->start + tmp->nr < good->start + good->nr) {
9873                         fprintf(stderr, "Ok we have overlapping extents that "
9874                                 "aren't completely covered by each other, this "
9875                                 "is going to require more careful thought.  "
9876                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9877                                 tmp->start, tmp->nr, good->start, good->nr);
9878                         abort();
9879                 }
9880                 good = tmp;
9881         }
9882
9883         if (good != rec)
9884                 list_add_tail(&rec->list, &delete_list);
9885
9886         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
9887                 if (tmp == good)
9888                         continue;
9889                 list_move_tail(&tmp->list, &delete_list);
9890         }
9891
9892         root = root->fs_info->extent_root;
9893         trans = btrfs_start_transaction(root, 1);
9894         if (IS_ERR(trans)) {
9895                 ret = PTR_ERR(trans);
9896                 goto out;
9897         }
9898
9899         list_for_each_entry(tmp, &delete_list, list) {
9900                 if (tmp->found_rec == 0)
9901                         continue;
9902                 key.objectid = tmp->start;
9903                 key.type = BTRFS_EXTENT_ITEM_KEY;
9904                 key.offset = tmp->nr;
9905
9906                 /* Shouldn't happen but just in case */
9907                 if (tmp->metadata) {
9908                         fprintf(stderr, "Well this shouldn't happen, extent "
9909                                 "record overlaps but is metadata? "
9910                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
9911                         abort();
9912                 }
9913
9914                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
9915                 if (ret) {
9916                         if (ret > 0)
9917                                 ret = -EINVAL;
9918                         break;
9919                 }
9920                 ret = btrfs_del_item(trans, root, &path);
9921                 if (ret)
9922                         break;
9923                 btrfs_release_path(&path);
9924                 nr_del++;
9925         }
9926         err = btrfs_commit_transaction(trans, root);
9927         if (err && !ret)
9928                 ret = err;
9929 out:
9930         while (!list_empty(&delete_list)) {
9931                 tmp = to_extent_record(delete_list.next);
9932                 list_del_init(&tmp->list);
9933                 if (tmp == rec)
9934                         continue;
9935                 free(tmp);
9936         }
9937
9938         while (!list_empty(&rec->dups)) {
9939                 tmp = to_extent_record(rec->dups.next);
9940                 list_del_init(&tmp->list);
9941                 free(tmp);
9942         }
9943
9944         btrfs_release_path(&path);
9945
9946         if (!ret && !nr_del)
9947                 rec->num_duplicates = 0;
9948
9949         return ret ? ret : nr_del;
9950 }
9951
9952 static int find_possible_backrefs(struct btrfs_fs_info *info,
9953                                   struct btrfs_path *path,
9954                                   struct cache_tree *extent_cache,
9955                                   struct extent_record *rec)
9956 {
9957         struct btrfs_root *root;
9958         struct extent_backref *back, *tmp;
9959         struct data_backref *dback;
9960         struct cache_extent *cache;
9961         struct btrfs_file_extent_item *fi;
9962         struct btrfs_key key;
9963         u64 bytenr, bytes;
9964         int ret;
9965
9966         rbtree_postorder_for_each_entry_safe(back, tmp,
9967                                              &rec->backref_tree, node) {
9968                 /* Don't care about full backrefs (poor unloved backrefs) */
9969                 if (back->full_backref || !back->is_data)
9970                         continue;
9971
9972                 dback = to_data_backref(back);
9973
9974                 /* We found this one, we don't need to do a lookup */
9975                 if (dback->found_ref)
9976                         continue;
9977
9978                 key.objectid = dback->root;
9979                 key.type = BTRFS_ROOT_ITEM_KEY;
9980                 key.offset = (u64)-1;
9981
9982                 root = btrfs_read_fs_root(info, &key);
9983
9984                 /* No root, definitely a bad ref, skip */
9985                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
9986                         continue;
9987                 /* Other err, exit */
9988                 if (IS_ERR(root))
9989                         return PTR_ERR(root);
9990
9991                 key.objectid = dback->owner;
9992                 key.type = BTRFS_EXTENT_DATA_KEY;
9993                 key.offset = dback->offset;
9994                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9995                 if (ret) {
9996                         btrfs_release_path(path);
9997                         if (ret < 0)
9998                                 return ret;
9999                         /* Didn't find it, we can carry on */
10000                         ret = 0;
10001                         continue;
10002                 }
10003
10004                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10005                                     struct btrfs_file_extent_item);
10006                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10007                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10008                 btrfs_release_path(path);
10009                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10010                 if (cache) {
10011                         struct extent_record *tmp;
10012                         tmp = container_of(cache, struct extent_record, cache);
10013
10014                         /*
10015                          * If we found an extent record for the bytenr for this
10016                          * particular backref then we can't add it to our
10017                          * current extent record.  We only want to add backrefs
10018                          * that don't have a corresponding extent item in the
10019                          * extent tree since they likely belong to this record
10020                          * and we need to fix it if it doesn't match bytenrs.
10021                          */
10022                         if  (tmp->found_rec)
10023                                 continue;
10024                 }
10025
10026                 dback->found_ref += 1;
10027                 dback->disk_bytenr = bytenr;
10028                 dback->bytes = bytes;
10029
10030                 /*
10031                  * Set this so the verify backref code knows not to trust the
10032                  * values in this backref.
10033                  */
10034                 back->broken = 1;
10035         }
10036
10037         return 0;
10038 }
10039
10040 /*
10041  * Record orphan data ref into corresponding root.
10042  *
10043  * Return 0 if the extent item contains data ref and recorded.
10044  * Return 1 if the extent item contains no useful data ref
10045  *   On that case, it may contains only shared_dataref or metadata backref
10046  *   or the file extent exists(this should be handled by the extent bytenr
10047  *   recovery routine)
10048  * Return <0 if something goes wrong.
10049  */
10050 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10051                                       struct extent_record *rec)
10052 {
10053         struct btrfs_key key;
10054         struct btrfs_root *dest_root;
10055         struct extent_backref *back, *tmp;
10056         struct data_backref *dback;
10057         struct orphan_data_extent *orphan;
10058         struct btrfs_path path;
10059         int recorded_data_ref = 0;
10060         int ret = 0;
10061
10062         if (rec->metadata)
10063                 return 1;
10064         btrfs_init_path(&path);
10065         rbtree_postorder_for_each_entry_safe(back, tmp,
10066                                              &rec->backref_tree, node) {
10067                 if (back->full_backref || !back->is_data ||
10068                     !back->found_extent_tree)
10069                         continue;
10070                 dback = to_data_backref(back);
10071                 if (dback->found_ref)
10072                         continue;
10073                 key.objectid = dback->root;
10074                 key.type = BTRFS_ROOT_ITEM_KEY;
10075                 key.offset = (u64)-1;
10076
10077                 dest_root = btrfs_read_fs_root(fs_info, &key);
10078
10079                 /* For non-exist root we just skip it */
10080                 if (IS_ERR(dest_root) || !dest_root)
10081                         continue;
10082
10083                 key.objectid = dback->owner;
10084                 key.type = BTRFS_EXTENT_DATA_KEY;
10085                 key.offset = dback->offset;
10086
10087                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10088                 btrfs_release_path(&path);
10089                 /*
10090                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10091                  * we need to record it for inode/file extent rebuild.
10092                  * For ret > 0, we record it only for file extent rebuild.
10093                  * For ret == 0, the file extent exists but only bytenr
10094                  * mismatch, let the original bytenr fix routine to handle,
10095                  * don't record it.
10096                  */
10097                 if (ret == 0)
10098                         continue;
10099                 ret = 0;
10100                 orphan = malloc(sizeof(*orphan));
10101                 if (!orphan) {
10102                         ret = -ENOMEM;
10103                         goto out;
10104                 }
10105                 INIT_LIST_HEAD(&orphan->list);
10106                 orphan->root = dback->root;
10107                 orphan->objectid = dback->owner;
10108                 orphan->offset = dback->offset;
10109                 orphan->disk_bytenr = rec->cache.start;
10110                 orphan->disk_len = rec->cache.size;
10111                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10112                 recorded_data_ref = 1;
10113         }
10114 out:
10115         btrfs_release_path(&path);
10116         if (!ret)
10117                 return !recorded_data_ref;
10118         else
10119                 return ret;
10120 }
10121
10122 /*
10123  * when an incorrect extent item is found, this will delete
10124  * all of the existing entries for it and recreate them
10125  * based on what the tree scan found.
10126  */
10127 static int fixup_extent_refs(struct btrfs_fs_info *info,
10128                              struct cache_tree *extent_cache,
10129                              struct extent_record *rec)
10130 {
10131         struct btrfs_trans_handle *trans = NULL;
10132         int ret;
10133         struct btrfs_path path;
10134         struct cache_extent *cache;
10135         struct extent_backref *back, *tmp;
10136         int allocated = 0;
10137         u64 flags = 0;
10138
10139         if (rec->flag_block_full_backref)
10140                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10141
10142         btrfs_init_path(&path);
10143         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10144                 /*
10145                  * Sometimes the backrefs themselves are so broken they don't
10146                  * get attached to any meaningful rec, so first go back and
10147                  * check any of our backrefs that we couldn't find and throw
10148                  * them into the list if we find the backref so that
10149                  * verify_backrefs can figure out what to do.
10150                  */
10151                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10152                 if (ret < 0)
10153                         goto out;
10154         }
10155
10156         /* step one, make sure all of the backrefs agree */
10157         ret = verify_backrefs(info, &path, rec);
10158         if (ret < 0)
10159                 goto out;
10160
10161         trans = btrfs_start_transaction(info->extent_root, 1);
10162         if (IS_ERR(trans)) {
10163                 ret = PTR_ERR(trans);
10164                 goto out;
10165         }
10166
10167         /* step two, delete all the existing records */
10168         ret = delete_extent_records(trans, info->extent_root, &path,
10169                                     rec->start);
10170
10171         if (ret < 0)
10172                 goto out;
10173
10174         /* was this block corrupt?  If so, don't add references to it */
10175         cache = lookup_cache_extent(info->corrupt_blocks,
10176                                     rec->start, rec->max_size);
10177         if (cache) {
10178                 ret = 0;
10179                 goto out;
10180         }
10181
10182         /* step three, recreate all the refs we did find */
10183         rbtree_postorder_for_each_entry_safe(back, tmp,
10184                                              &rec->backref_tree, node) {
10185                 /*
10186                  * if we didn't find any references, don't create a
10187                  * new extent record
10188                  */
10189                 if (!back->found_ref)
10190                         continue;
10191
10192                 rec->bad_full_backref = 0;
10193                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10194                 allocated = 1;
10195
10196                 if (ret)
10197                         goto out;
10198         }
10199 out:
10200         if (trans) {
10201                 int err = btrfs_commit_transaction(trans, info->extent_root);
10202                 if (!ret)
10203                         ret = err;
10204         }
10205
10206         if (!ret)
10207                 fprintf(stderr, "Repaired extent references for %llu\n",
10208                                 (unsigned long long)rec->start);
10209
10210         btrfs_release_path(&path);
10211         return ret;
10212 }
10213
10214 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10215                               struct extent_record *rec)
10216 {
10217         struct btrfs_trans_handle *trans;
10218         struct btrfs_root *root = fs_info->extent_root;
10219         struct btrfs_path path;
10220         struct btrfs_extent_item *ei;
10221         struct btrfs_key key;
10222         u64 flags;
10223         int ret = 0;
10224
10225         key.objectid = rec->start;
10226         if (rec->metadata) {
10227                 key.type = BTRFS_METADATA_ITEM_KEY;
10228                 key.offset = rec->info_level;
10229         } else {
10230                 key.type = BTRFS_EXTENT_ITEM_KEY;
10231                 key.offset = rec->max_size;
10232         }
10233
10234         trans = btrfs_start_transaction(root, 0);
10235         if (IS_ERR(trans))
10236                 return PTR_ERR(trans);
10237
10238         btrfs_init_path(&path);
10239         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10240         if (ret < 0) {
10241                 btrfs_release_path(&path);
10242                 btrfs_commit_transaction(trans, root);
10243                 return ret;
10244         } else if (ret) {
10245                 fprintf(stderr, "Didn't find extent for %llu\n",
10246                         (unsigned long long)rec->start);
10247                 btrfs_release_path(&path);
10248                 btrfs_commit_transaction(trans, root);
10249                 return -ENOENT;
10250         }
10251
10252         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10253                             struct btrfs_extent_item);
10254         flags = btrfs_extent_flags(path.nodes[0], ei);
10255         if (rec->flag_block_full_backref) {
10256                 fprintf(stderr, "setting full backref on %llu\n",
10257                         (unsigned long long)key.objectid);
10258                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10259         } else {
10260                 fprintf(stderr, "clearing full backref on %llu\n",
10261                         (unsigned long long)key.objectid);
10262                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10263         }
10264         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10265         btrfs_mark_buffer_dirty(path.nodes[0]);
10266         btrfs_release_path(&path);
10267         ret = btrfs_commit_transaction(trans, root);
10268         if (!ret)
10269                 fprintf(stderr, "Repaired extent flags for %llu\n",
10270                                 (unsigned long long)rec->start);
10271
10272         return ret;
10273 }
10274
10275 /* right now we only prune from the extent allocation tree */
10276 static int prune_one_block(struct btrfs_trans_handle *trans,
10277                            struct btrfs_fs_info *info,
10278                            struct btrfs_corrupt_block *corrupt)
10279 {
10280         int ret;
10281         struct btrfs_path path;
10282         struct extent_buffer *eb;
10283         u64 found;
10284         int slot;
10285         int nritems;
10286         int level = corrupt->level + 1;
10287
10288         btrfs_init_path(&path);
10289 again:
10290         /* we want to stop at the parent to our busted block */
10291         path.lowest_level = level;
10292
10293         ret = btrfs_search_slot(trans, info->extent_root,
10294                                 &corrupt->key, &path, -1, 1);
10295
10296         if (ret < 0)
10297                 goto out;
10298
10299         eb = path.nodes[level];
10300         if (!eb) {
10301                 ret = -ENOENT;
10302                 goto out;
10303         }
10304
10305         /*
10306          * hopefully the search gave us the block we want to prune,
10307          * lets try that first
10308          */
10309         slot = path.slots[level];
10310         found =  btrfs_node_blockptr(eb, slot);
10311         if (found == corrupt->cache.start)
10312                 goto del_ptr;
10313
10314         nritems = btrfs_header_nritems(eb);
10315
10316         /* the search failed, lets scan this node and hope we find it */
10317         for (slot = 0; slot < nritems; slot++) {
10318                 found =  btrfs_node_blockptr(eb, slot);
10319                 if (found == corrupt->cache.start)
10320                         goto del_ptr;
10321         }
10322         /*
10323          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10324          * to this block
10325          */
10326         if (eb == info->extent_root->node) {
10327                 ret = -ENOENT;
10328                 goto out;
10329         } else {
10330                 level++;
10331                 btrfs_release_path(&path);
10332                 goto again;
10333         }
10334
10335 del_ptr:
10336         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10337         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10338
10339 out:
10340         btrfs_release_path(&path);
10341         return ret;
10342 }
10343
10344 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10345 {
10346         struct btrfs_trans_handle *trans = NULL;
10347         struct cache_extent *cache;
10348         struct btrfs_corrupt_block *corrupt;
10349
10350         while (1) {
10351                 cache = search_cache_extent(info->corrupt_blocks, 0);
10352                 if (!cache)
10353                         break;
10354                 if (!trans) {
10355                         trans = btrfs_start_transaction(info->extent_root, 1);
10356                         if (IS_ERR(trans))
10357                                 return PTR_ERR(trans);
10358                 }
10359                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10360                 prune_one_block(trans, info, corrupt);
10361                 remove_cache_extent(info->corrupt_blocks, cache);
10362         }
10363         if (trans)
10364                 return btrfs_commit_transaction(trans, info->extent_root);
10365         return 0;
10366 }
10367
10368 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10369 {
10370         struct btrfs_block_group_cache *cache;
10371         u64 start, end;
10372         int ret;
10373
10374         while (1) {
10375                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10376                                             &start, &end, EXTENT_DIRTY);
10377                 if (ret)
10378                         break;
10379                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10380         }
10381
10382         start = 0;
10383         while (1) {
10384                 cache = btrfs_lookup_first_block_group(fs_info, start);
10385                 if (!cache)
10386                         break;
10387                 if (cache->cached)
10388                         cache->cached = 0;
10389                 start = cache->key.objectid + cache->key.offset;
10390         }
10391 }
10392
10393 static int check_extent_refs(struct btrfs_root *root,
10394                              struct cache_tree *extent_cache)
10395 {
10396         struct extent_record *rec;
10397         struct cache_extent *cache;
10398         int ret = 0;
10399         int had_dups = 0;
10400         int err = 0;
10401
10402         if (repair) {
10403                 /*
10404                  * if we're doing a repair, we have to make sure
10405                  * we don't allocate from the problem extents.
10406                  * In the worst case, this will be all the
10407                  * extents in the FS
10408                  */
10409                 cache = search_cache_extent(extent_cache, 0);
10410                 while(cache) {
10411                         rec = container_of(cache, struct extent_record, cache);
10412                         set_extent_dirty(root->fs_info->excluded_extents,
10413                                          rec->start,
10414                                          rec->start + rec->max_size - 1);
10415                         cache = next_cache_extent(cache);
10416                 }
10417
10418                 /* pin down all the corrupted blocks too */
10419                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10420                 while(cache) {
10421                         set_extent_dirty(root->fs_info->excluded_extents,
10422                                          cache->start,
10423                                          cache->start + cache->size - 1);
10424                         cache = next_cache_extent(cache);
10425                 }
10426                 prune_corrupt_blocks(root->fs_info);
10427                 reset_cached_block_groups(root->fs_info);
10428         }
10429
10430         reset_cached_block_groups(root->fs_info);
10431
10432         /*
10433          * We need to delete any duplicate entries we find first otherwise we
10434          * could mess up the extent tree when we have backrefs that actually
10435          * belong to a different extent item and not the weird duplicate one.
10436          */
10437         while (repair && !list_empty(&duplicate_extents)) {
10438                 rec = to_extent_record(duplicate_extents.next);
10439                 list_del_init(&rec->list);
10440
10441                 /* Sometimes we can find a backref before we find an actual
10442                  * extent, so we need to process it a little bit to see if there
10443                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10444                  * if this is a backref screwup.  If we need to delete stuff
10445                  * process_duplicates() will return 0, otherwise it will return
10446                  * 1 and we
10447                  */
10448                 if (process_duplicates(extent_cache, rec))
10449                         continue;
10450                 ret = delete_duplicate_records(root, rec);
10451                 if (ret < 0)
10452                         return ret;
10453                 /*
10454                  * delete_duplicate_records will return the number of entries
10455                  * deleted, so if it's greater than 0 then we know we actually
10456                  * did something and we need to remove.
10457                  */
10458                 if (ret)
10459                         had_dups = 1;
10460         }
10461
10462         if (had_dups)
10463                 return -EAGAIN;
10464
10465         while(1) {
10466                 int cur_err = 0;
10467                 int fix = 0;
10468
10469                 cache = search_cache_extent(extent_cache, 0);
10470                 if (!cache)
10471                         break;
10472                 rec = container_of(cache, struct extent_record, cache);
10473                 if (rec->num_duplicates) {
10474                         fprintf(stderr, "extent item %llu has multiple extent "
10475                                 "items\n", (unsigned long long)rec->start);
10476                         cur_err = 1;
10477                 }
10478
10479                 if (rec->refs != rec->extent_item_refs) {
10480                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10481                                 (unsigned long long)rec->start,
10482                                 (unsigned long long)rec->nr);
10483                         fprintf(stderr, "extent item %llu, found %llu\n",
10484                                 (unsigned long long)rec->extent_item_refs,
10485                                 (unsigned long long)rec->refs);
10486                         ret = record_orphan_data_extents(root->fs_info, rec);
10487                         if (ret < 0)
10488                                 goto repair_abort;
10489                         fix = ret;
10490                         cur_err = 1;
10491                 }
10492                 if (all_backpointers_checked(rec, 1)) {
10493                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10494                                 (unsigned long long)rec->start,
10495                                 (unsigned long long)rec->nr);
10496                         fix = 1;
10497                         cur_err = 1;
10498                 }
10499                 if (!rec->owner_ref_checked) {
10500                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10501                                 (unsigned long long)rec->start,
10502                                 (unsigned long long)rec->nr);
10503                         fix = 1;
10504                         cur_err = 1;
10505                 }
10506
10507                 if (repair && fix) {
10508                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10509                         if (ret)
10510                                 goto repair_abort;
10511                 }
10512
10513
10514                 if (rec->bad_full_backref) {
10515                         fprintf(stderr, "bad full backref, on [%llu]\n",
10516                                 (unsigned long long)rec->start);
10517                         if (repair) {
10518                                 ret = fixup_extent_flags(root->fs_info, rec);
10519                                 if (ret)
10520                                         goto repair_abort;
10521                                 fix = 1;
10522                         }
10523                         cur_err = 1;
10524                 }
10525                 /*
10526                  * Although it's not a extent ref's problem, we reuse this
10527                  * routine for error reporting.
10528                  * No repair function yet.
10529                  */
10530                 if (rec->crossing_stripes) {
10531                         fprintf(stderr,
10532                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10533                                 rec->start, rec->start + rec->max_size);
10534                         cur_err = 1;
10535                 }
10536
10537                 if (rec->wrong_chunk_type) {
10538                         fprintf(stderr,
10539                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10540                                 rec->start, rec->start + rec->max_size);
10541                         cur_err = 1;
10542                 }
10543
10544                 err = cur_err;
10545                 remove_cache_extent(extent_cache, cache);
10546                 free_all_extent_backrefs(rec);
10547                 if (!init_extent_tree && repair && (!cur_err || fix))
10548                         clear_extent_dirty(root->fs_info->excluded_extents,
10549                                            rec->start,
10550                                            rec->start + rec->max_size - 1);
10551                 free(rec);
10552         }
10553 repair_abort:
10554         if (repair) {
10555                 if (ret && ret != -EAGAIN) {
10556                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10557                         exit(1);
10558                 } else if (!ret) {
10559                         struct btrfs_trans_handle *trans;
10560
10561                         root = root->fs_info->extent_root;
10562                         trans = btrfs_start_transaction(root, 1);
10563                         if (IS_ERR(trans)) {
10564                                 ret = PTR_ERR(trans);
10565                                 goto repair_abort;
10566                         }
10567
10568                         ret = btrfs_fix_block_accounting(trans, root);
10569                         if (ret)
10570                                 goto repair_abort;
10571                         ret = btrfs_commit_transaction(trans, root);
10572                         if (ret)
10573                                 goto repair_abort;
10574                 }
10575                 return ret;
10576         }
10577
10578         if (err)
10579                 err = -EIO;
10580         return err;
10581 }
10582
10583 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10584 {
10585         u64 stripe_size;
10586
10587         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10588                 stripe_size = length;
10589                 stripe_size /= num_stripes;
10590         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10591                 stripe_size = length * 2;
10592                 stripe_size /= num_stripes;
10593         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10594                 stripe_size = length;
10595                 stripe_size /= (num_stripes - 1);
10596         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10597                 stripe_size = length;
10598                 stripe_size /= (num_stripes - 2);
10599         } else {
10600                 stripe_size = length;
10601         }
10602         return stripe_size;
10603 }
10604
10605 /*
10606  * Check the chunk with its block group/dev list ref:
10607  * Return 0 if all refs seems valid.
10608  * Return 1 if part of refs seems valid, need later check for rebuild ref
10609  * like missing block group and needs to search extent tree to rebuild them.
10610  * Return -1 if essential refs are missing and unable to rebuild.
10611  */
10612 static int check_chunk_refs(struct chunk_record *chunk_rec,
10613                             struct block_group_tree *block_group_cache,
10614                             struct device_extent_tree *dev_extent_cache,
10615                             int silent)
10616 {
10617         struct cache_extent *block_group_item;
10618         struct block_group_record *block_group_rec;
10619         struct cache_extent *dev_extent_item;
10620         struct device_extent_record *dev_extent_rec;
10621         u64 devid;
10622         u64 offset;
10623         u64 length;
10624         int metadump_v2 = 0;
10625         int i;
10626         int ret = 0;
10627
10628         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10629                                                chunk_rec->offset,
10630                                                chunk_rec->length);
10631         if (block_group_item) {
10632                 block_group_rec = container_of(block_group_item,
10633                                                struct block_group_record,
10634                                                cache);
10635                 if (chunk_rec->length != block_group_rec->offset ||
10636                     chunk_rec->offset != block_group_rec->objectid ||
10637                     (!metadump_v2 &&
10638                      chunk_rec->type_flags != block_group_rec->flags)) {
10639                         if (!silent)
10640                                 fprintf(stderr,
10641                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10642                                         chunk_rec->objectid,
10643                                         chunk_rec->type,
10644                                         chunk_rec->offset,
10645                                         chunk_rec->length,
10646                                         chunk_rec->offset,
10647                                         chunk_rec->type_flags,
10648                                         block_group_rec->objectid,
10649                                         block_group_rec->type,
10650                                         block_group_rec->offset,
10651                                         block_group_rec->offset,
10652                                         block_group_rec->objectid,
10653                                         block_group_rec->flags);
10654                         ret = -1;
10655                 } else {
10656                         list_del_init(&block_group_rec->list);
10657                         chunk_rec->bg_rec = block_group_rec;
10658                 }
10659         } else {
10660                 if (!silent)
10661                         fprintf(stderr,
10662                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10663                                 chunk_rec->objectid,
10664                                 chunk_rec->type,
10665                                 chunk_rec->offset,
10666                                 chunk_rec->length,
10667                                 chunk_rec->offset,
10668                                 chunk_rec->type_flags);
10669                 ret = 1;
10670         }
10671
10672         if (metadump_v2)
10673                 return ret;
10674
10675         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10676                                     chunk_rec->num_stripes);
10677         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10678                 devid = chunk_rec->stripes[i].devid;
10679                 offset = chunk_rec->stripes[i].offset;
10680                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10681                                                        devid, offset, length);
10682                 if (dev_extent_item) {
10683                         dev_extent_rec = container_of(dev_extent_item,
10684                                                 struct device_extent_record,
10685                                                 cache);
10686                         if (dev_extent_rec->objectid != devid ||
10687                             dev_extent_rec->offset != offset ||
10688                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10689                             dev_extent_rec->length != length) {
10690                                 if (!silent)
10691                                         fprintf(stderr,
10692                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10693                                                 chunk_rec->objectid,
10694                                                 chunk_rec->type,
10695                                                 chunk_rec->offset,
10696                                                 chunk_rec->stripes[i].devid,
10697                                                 chunk_rec->stripes[i].offset,
10698                                                 dev_extent_rec->objectid,
10699                                                 dev_extent_rec->offset,
10700                                                 dev_extent_rec->length);
10701                                 ret = -1;
10702                         } else {
10703                                 list_move(&dev_extent_rec->chunk_list,
10704                                           &chunk_rec->dextents);
10705                         }
10706                 } else {
10707                         if (!silent)
10708                                 fprintf(stderr,
10709                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10710                                         chunk_rec->objectid,
10711                                         chunk_rec->type,
10712                                         chunk_rec->offset,
10713                                         chunk_rec->stripes[i].devid,
10714                                         chunk_rec->stripes[i].offset);
10715                         ret = -1;
10716                 }
10717         }
10718         return ret;
10719 }
10720
10721 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10722 int check_chunks(struct cache_tree *chunk_cache,
10723                  struct block_group_tree *block_group_cache,
10724                  struct device_extent_tree *dev_extent_cache,
10725                  struct list_head *good, struct list_head *bad,
10726                  struct list_head *rebuild, int silent)
10727 {
10728         struct cache_extent *chunk_item;
10729         struct chunk_record *chunk_rec;
10730         struct block_group_record *bg_rec;
10731         struct device_extent_record *dext_rec;
10732         int err;
10733         int ret = 0;
10734
10735         chunk_item = first_cache_extent(chunk_cache);
10736         while (chunk_item) {
10737                 chunk_rec = container_of(chunk_item, struct chunk_record,
10738                                          cache);
10739                 err = check_chunk_refs(chunk_rec, block_group_cache,
10740                                        dev_extent_cache, silent);
10741                 if (err < 0)
10742                         ret = err;
10743                 if (err == 0 && good)
10744                         list_add_tail(&chunk_rec->list, good);
10745                 if (err > 0 && rebuild)
10746                         list_add_tail(&chunk_rec->list, rebuild);
10747                 if (err < 0 && bad)
10748                         list_add_tail(&chunk_rec->list, bad);
10749                 chunk_item = next_cache_extent(chunk_item);
10750         }
10751
10752         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10753                 if (!silent)
10754                         fprintf(stderr,
10755                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10756                                 bg_rec->objectid,
10757                                 bg_rec->offset,
10758                                 bg_rec->flags);
10759                 if (!ret)
10760                         ret = 1;
10761         }
10762
10763         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10764                             chunk_list) {
10765                 if (!silent)
10766                         fprintf(stderr,
10767                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10768                                 dext_rec->objectid,
10769                                 dext_rec->offset,
10770                                 dext_rec->length);
10771                 if (!ret)
10772                         ret = 1;
10773         }
10774         return ret;
10775 }
10776
10777
10778 static int check_device_used(struct device_record *dev_rec,
10779                              struct device_extent_tree *dext_cache)
10780 {
10781         struct cache_extent *cache;
10782         struct device_extent_record *dev_extent_rec;
10783         u64 total_byte = 0;
10784
10785         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10786         while (cache) {
10787                 dev_extent_rec = container_of(cache,
10788                                               struct device_extent_record,
10789                                               cache);
10790                 if (dev_extent_rec->objectid != dev_rec->devid)
10791                         break;
10792
10793                 list_del_init(&dev_extent_rec->device_list);
10794                 total_byte += dev_extent_rec->length;
10795                 cache = next_cache_extent(cache);
10796         }
10797
10798         if (total_byte != dev_rec->byte_used) {
10799                 fprintf(stderr,
10800                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10801                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10802                         dev_rec->type, dev_rec->offset);
10803                 return -1;
10804         } else {
10805                 return 0;
10806         }
10807 }
10808
10809 /*
10810  * Extra (optional) check for dev_item size to report possbile problem on a new
10811  * kernel.
10812  */
10813 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10814 {
10815         if (!IS_ALIGNED(total_bytes, sectorsize)) {
10816                 warning(
10817 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10818                         devid, total_bytes, sectorsize);
10819                 warning(
10820 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10821                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10822         }
10823 }
10824
10825 /*
10826  * Unlike device size alignment check above, some super total_bytes check
10827  * failure can lead to mount failure for newer kernel.
10828  *
10829  * So this function will return the error for a fatal super total_bytes problem.
10830  */
10831 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10832 {
10833         struct btrfs_device *dev;
10834         struct list_head *dev_list = &fs_info->fs_devices->devices;
10835         u64 total_bytes = 0;
10836         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10837
10838         list_for_each_entry(dev, dev_list, dev_list)
10839                 total_bytes += dev->total_bytes;
10840
10841         /* Important check, which can cause unmountable fs */
10842         if (super_bytes < total_bytes) {
10843                 error("super total bytes %llu smaller than real device(s) size %llu",
10844                         super_bytes, total_bytes);
10845                 error("mounting this fs may fail for newer kernels");
10846                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10847                 return false;
10848         }
10849
10850         /*
10851          * Optional check, just to make everything aligned and match with each
10852          * other.
10853          *
10854          * For a btrfs-image restored fs, we don't need to check it anyway.
10855          */
10856         if (btrfs_super_flags(fs_info->super_copy) &
10857             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10858                 return true;
10859         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10860             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10861             super_bytes != total_bytes) {
10862                 warning("minor unaligned/mismatch device size detected");
10863                 warning(
10864                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10865         }
10866         return true;
10867 }
10868
10869 /* check btrfs_dev_item -> btrfs_dev_extent */
10870 static int check_devices(struct rb_root *dev_cache,
10871                          struct device_extent_tree *dev_extent_cache)
10872 {
10873         struct rb_node *dev_node;
10874         struct device_record *dev_rec;
10875         struct device_extent_record *dext_rec;
10876         int err;
10877         int ret = 0;
10878
10879         dev_node = rb_first(dev_cache);
10880         while (dev_node) {
10881                 dev_rec = container_of(dev_node, struct device_record, node);
10882                 err = check_device_used(dev_rec, dev_extent_cache);
10883                 if (err)
10884                         ret = err;
10885
10886                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
10887                                          global_info->sectorsize);
10888                 dev_node = rb_next(dev_node);
10889         }
10890         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
10891                             device_list) {
10892                 fprintf(stderr,
10893                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
10894                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
10895                 if (!ret)
10896                         ret = 1;
10897         }
10898         return ret;
10899 }
10900
10901 static int add_root_item_to_list(struct list_head *head,
10902                                   u64 objectid, u64 bytenr, u64 last_snapshot,
10903                                   u8 level, u8 drop_level,
10904                                   struct btrfs_key *drop_key)
10905 {
10906
10907         struct root_item_record *ri_rec;
10908         ri_rec = malloc(sizeof(*ri_rec));
10909         if (!ri_rec)
10910                 return -ENOMEM;
10911         ri_rec->bytenr = bytenr;
10912         ri_rec->objectid = objectid;
10913         ri_rec->level = level;
10914         ri_rec->drop_level = drop_level;
10915         ri_rec->last_snapshot = last_snapshot;
10916         if (drop_key)
10917                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
10918         list_add_tail(&ri_rec->list, head);
10919
10920         return 0;
10921 }
10922
10923 static void free_root_item_list(struct list_head *list)
10924 {
10925         struct root_item_record *ri_rec;
10926
10927         while (!list_empty(list)) {
10928                 ri_rec = list_first_entry(list, struct root_item_record,
10929                                           list);
10930                 list_del_init(&ri_rec->list);
10931                 free(ri_rec);
10932         }
10933 }
10934
10935 static int deal_root_from_list(struct list_head *list,
10936                                struct btrfs_root *root,
10937                                struct block_info *bits,
10938                                int bits_nr,
10939                                struct cache_tree *pending,
10940                                struct cache_tree *seen,
10941                                struct cache_tree *reada,
10942                                struct cache_tree *nodes,
10943                                struct cache_tree *extent_cache,
10944                                struct cache_tree *chunk_cache,
10945                                struct rb_root *dev_cache,
10946                                struct block_group_tree *block_group_cache,
10947                                struct device_extent_tree *dev_extent_cache)
10948 {
10949         int ret = 0;
10950         u64 last;
10951
10952         while (!list_empty(list)) {
10953                 struct root_item_record *rec;
10954                 struct extent_buffer *buf;
10955                 rec = list_entry(list->next,
10956                                  struct root_item_record, list);
10957                 last = 0;
10958                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
10959                 if (!extent_buffer_uptodate(buf)) {
10960                         free_extent_buffer(buf);
10961                         ret = -EIO;
10962                         break;
10963                 }
10964                 ret = add_root_to_pending(buf, extent_cache, pending,
10965                                     seen, nodes, rec->objectid);
10966                 if (ret < 0)
10967                         break;
10968                 /*
10969                  * To rebuild extent tree, we need deal with snapshot
10970                  * one by one, otherwise we deal with node firstly which
10971                  * can maximize readahead.
10972                  */
10973                 while (1) {
10974                         ret = run_next_block(root, bits, bits_nr, &last,
10975                                              pending, seen, reada, nodes,
10976                                              extent_cache, chunk_cache,
10977                                              dev_cache, block_group_cache,
10978                                              dev_extent_cache, rec);
10979                         if (ret != 0)
10980                                 break;
10981                 }
10982                 free_extent_buffer(buf);
10983                 list_del(&rec->list);
10984                 free(rec);
10985                 if (ret < 0)
10986                         break;
10987         }
10988         while (ret >= 0) {
10989                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
10990                                      reada, nodes, extent_cache, chunk_cache,
10991                                      dev_cache, block_group_cache,
10992                                      dev_extent_cache, NULL);
10993                 if (ret != 0) {
10994                         if (ret > 0)
10995                                 ret = 0;
10996                         break;
10997                 }
10998         }
10999         return ret;
11000 }
11001
11002 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11003 {
11004         struct rb_root dev_cache;
11005         struct cache_tree chunk_cache;
11006         struct block_group_tree block_group_cache;
11007         struct device_extent_tree dev_extent_cache;
11008         struct cache_tree extent_cache;
11009         struct cache_tree seen;
11010         struct cache_tree pending;
11011         struct cache_tree reada;
11012         struct cache_tree nodes;
11013         struct extent_io_tree excluded_extents;
11014         struct cache_tree corrupt_blocks;
11015         struct btrfs_path path;
11016         struct btrfs_key key;
11017         struct btrfs_key found_key;
11018         int ret, err = 0;
11019         struct block_info *bits;
11020         int bits_nr;
11021         struct extent_buffer *leaf;
11022         int slot;
11023         struct btrfs_root_item ri;
11024         struct list_head dropping_trees;
11025         struct list_head normal_trees;
11026         struct btrfs_root *root1;
11027         struct btrfs_root *root;
11028         u64 objectid;
11029         u8 level;
11030
11031         root = fs_info->fs_root;
11032         dev_cache = RB_ROOT;
11033         cache_tree_init(&chunk_cache);
11034         block_group_tree_init(&block_group_cache);
11035         device_extent_tree_init(&dev_extent_cache);
11036
11037         cache_tree_init(&extent_cache);
11038         cache_tree_init(&seen);
11039         cache_tree_init(&pending);
11040         cache_tree_init(&nodes);
11041         cache_tree_init(&reada);
11042         cache_tree_init(&corrupt_blocks);
11043         extent_io_tree_init(&excluded_extents);
11044         INIT_LIST_HEAD(&dropping_trees);
11045         INIT_LIST_HEAD(&normal_trees);
11046
11047         if (repair) {
11048                 fs_info->excluded_extents = &excluded_extents;
11049                 fs_info->fsck_extent_cache = &extent_cache;
11050                 fs_info->free_extent_hook = free_extent_hook;
11051                 fs_info->corrupt_blocks = &corrupt_blocks;
11052         }
11053
11054         bits_nr = 1024;
11055         bits = malloc(bits_nr * sizeof(struct block_info));
11056         if (!bits) {
11057                 perror("malloc");
11058                 exit(1);
11059         }
11060
11061         if (ctx.progress_enabled) {
11062                 ctx.tp = TASK_EXTENTS;
11063                 task_start(ctx.info);
11064         }
11065
11066 again:
11067         root1 = fs_info->tree_root;
11068         level = btrfs_header_level(root1->node);
11069         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11070                                     root1->node->start, 0, level, 0, NULL);
11071         if (ret < 0)
11072                 goto out;
11073         root1 = fs_info->chunk_root;
11074         level = btrfs_header_level(root1->node);
11075         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11076                                     root1->node->start, 0, level, 0, NULL);
11077         if (ret < 0)
11078                 goto out;
11079         btrfs_init_path(&path);
11080         key.offset = 0;
11081         key.objectid = 0;
11082         key.type = BTRFS_ROOT_ITEM_KEY;
11083         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11084         if (ret < 0)
11085                 goto out;
11086         while(1) {
11087                 leaf = path.nodes[0];
11088                 slot = path.slots[0];
11089                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11090                         ret = btrfs_next_leaf(root, &path);
11091                         if (ret != 0)
11092                                 break;
11093                         leaf = path.nodes[0];
11094                         slot = path.slots[0];
11095                 }
11096                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11097                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11098                         unsigned long offset;
11099                         u64 last_snapshot;
11100
11101                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11102                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11103                         last_snapshot = btrfs_root_last_snapshot(&ri);
11104                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11105                                 level = btrfs_root_level(&ri);
11106                                 ret = add_root_item_to_list(&normal_trees,
11107                                                 found_key.objectid,
11108                                                 btrfs_root_bytenr(&ri),
11109                                                 last_snapshot, level,
11110                                                 0, NULL);
11111                                 if (ret < 0)
11112                                         goto out;
11113                         } else {
11114                                 level = btrfs_root_level(&ri);
11115                                 objectid = found_key.objectid;
11116                                 btrfs_disk_key_to_cpu(&found_key,
11117                                                       &ri.drop_progress);
11118                                 ret = add_root_item_to_list(&dropping_trees,
11119                                                 objectid,
11120                                                 btrfs_root_bytenr(&ri),
11121                                                 last_snapshot, level,
11122                                                 ri.drop_level, &found_key);
11123                                 if (ret < 0)
11124                                         goto out;
11125                         }
11126                 }
11127                 path.slots[0]++;
11128         }
11129         btrfs_release_path(&path);
11130
11131         /*
11132          * check_block can return -EAGAIN if it fixes something, please keep
11133          * this in mind when dealing with return values from these functions, if
11134          * we get -EAGAIN we want to fall through and restart the loop.
11135          */
11136         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11137                                   &seen, &reada, &nodes, &extent_cache,
11138                                   &chunk_cache, &dev_cache, &block_group_cache,
11139                                   &dev_extent_cache);
11140         if (ret < 0) {
11141                 if (ret == -EAGAIN)
11142                         goto loop;
11143                 goto out;
11144         }
11145         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11146                                   &pending, &seen, &reada, &nodes,
11147                                   &extent_cache, &chunk_cache, &dev_cache,
11148                                   &block_group_cache, &dev_extent_cache);
11149         if (ret < 0) {
11150                 if (ret == -EAGAIN)
11151                         goto loop;
11152                 goto out;
11153         }
11154
11155         ret = check_chunks(&chunk_cache, &block_group_cache,
11156                            &dev_extent_cache, NULL, NULL, NULL, 0);
11157         if (ret) {
11158                 if (ret == -EAGAIN)
11159                         goto loop;
11160                 err = ret;
11161         }
11162
11163         ret = check_extent_refs(root, &extent_cache);
11164         if (ret < 0) {
11165                 if (ret == -EAGAIN)
11166                         goto loop;
11167                 goto out;
11168         }
11169
11170         ret = check_devices(&dev_cache, &dev_extent_cache);
11171         if (ret && err)
11172                 ret = err;
11173
11174 out:
11175         task_stop(ctx.info);
11176         if (repair) {
11177                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11178                 extent_io_tree_cleanup(&excluded_extents);
11179                 fs_info->fsck_extent_cache = NULL;
11180                 fs_info->free_extent_hook = NULL;
11181                 fs_info->corrupt_blocks = NULL;
11182                 fs_info->excluded_extents = NULL;
11183         }
11184         free(bits);
11185         free_chunk_cache_tree(&chunk_cache);
11186         free_device_cache_tree(&dev_cache);
11187         free_block_group_tree(&block_group_cache);
11188         free_device_extent_tree(&dev_extent_cache);
11189         free_extent_cache_tree(&seen);
11190         free_extent_cache_tree(&pending);
11191         free_extent_cache_tree(&reada);
11192         free_extent_cache_tree(&nodes);
11193         free_root_item_list(&normal_trees);
11194         free_root_item_list(&dropping_trees);
11195         return ret;
11196 loop:
11197         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11198         free_extent_cache_tree(&seen);
11199         free_extent_cache_tree(&pending);
11200         free_extent_cache_tree(&reada);
11201         free_extent_cache_tree(&nodes);
11202         free_chunk_cache_tree(&chunk_cache);
11203         free_block_group_tree(&block_group_cache);
11204         free_device_cache_tree(&dev_cache);
11205         free_device_extent_tree(&dev_extent_cache);
11206         free_extent_record_cache(&extent_cache);
11207         free_root_item_list(&normal_trees);
11208         free_root_item_list(&dropping_trees);
11209         extent_io_tree_cleanup(&excluded_extents);
11210         goto again;
11211 }
11212
11213 static int check_extent_inline_ref(struct extent_buffer *eb,
11214                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11215 {
11216         int ret;
11217         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11218
11219         switch (type) {
11220         case BTRFS_TREE_BLOCK_REF_KEY:
11221         case BTRFS_EXTENT_DATA_REF_KEY:
11222         case BTRFS_SHARED_BLOCK_REF_KEY:
11223         case BTRFS_SHARED_DATA_REF_KEY:
11224                 ret = 0;
11225                 break;
11226         default:
11227                 error("extent[%llu %u %llu] has unknown ref type: %d",
11228                       key->objectid, key->type, key->offset, type);
11229                 ret = UNKNOWN_TYPE;
11230                 break;
11231         }
11232
11233         return ret;
11234 }
11235
11236 /*
11237  * Check backrefs of a tree block given by @bytenr or @eb.
11238  *
11239  * @root:       the root containing the @bytenr or @eb
11240  * @eb:         tree block extent buffer, can be NULL
11241  * @bytenr:     bytenr of the tree block to search
11242  * @level:      tree level of the tree block
11243  * @owner:      owner of the tree block
11244  *
11245  * Return >0 for any error found and output error message
11246  * Return 0 for no error found
11247  */
11248 static int check_tree_block_ref(struct btrfs_root *root,
11249                                 struct extent_buffer *eb, u64 bytenr,
11250                                 int level, u64 owner, struct node_refs *nrefs)
11251 {
11252         struct btrfs_key key;
11253         struct btrfs_root *extent_root = root->fs_info->extent_root;
11254         struct btrfs_path path;
11255         struct btrfs_extent_item *ei;
11256         struct btrfs_extent_inline_ref *iref;
11257         struct extent_buffer *leaf;
11258         unsigned long end;
11259         unsigned long ptr;
11260         int slot;
11261         int skinny_level;
11262         int root_level = btrfs_header_level(root->node);
11263         int type;
11264         u32 nodesize = root->fs_info->nodesize;
11265         u32 item_size;
11266         u64 offset;
11267         int found_ref = 0;
11268         int err = 0;
11269         int ret;
11270         int strict = 1;
11271         int parent = 0;
11272
11273         btrfs_init_path(&path);
11274         key.objectid = bytenr;
11275         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11276                 key.type = BTRFS_METADATA_ITEM_KEY;
11277         else
11278                 key.type = BTRFS_EXTENT_ITEM_KEY;
11279         key.offset = (u64)-1;
11280
11281         /* Search for the backref in extent tree */
11282         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11283         if (ret < 0) {
11284                 err |= BACKREF_MISSING;
11285                 goto out;
11286         }
11287         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11288         if (ret) {
11289                 err |= BACKREF_MISSING;
11290                 goto out;
11291         }
11292
11293         leaf = path.nodes[0];
11294         slot = path.slots[0];
11295         btrfs_item_key_to_cpu(leaf, &key, slot);
11296
11297         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11298
11299         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11300                 skinny_level = (int)key.offset;
11301                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11302         } else {
11303                 struct btrfs_tree_block_info *info;
11304
11305                 info = (struct btrfs_tree_block_info *)(ei + 1);
11306                 skinny_level = btrfs_tree_block_level(leaf, info);
11307                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11308         }
11309
11310
11311         if (eb) {
11312                 u64 header_gen;
11313                 u64 extent_gen;
11314
11315                 /*
11316                  * Due to the feature of shared tree blocks, if the upper node
11317                  * is a fs root or shared node, the extent of checked node may
11318                  * not be updated until the next CoW.
11319                  */
11320                 if (nrefs)
11321                         strict = should_check_extent_strictly(root, nrefs,
11322                                         level);
11323                 if (!(btrfs_extent_flags(leaf, ei) &
11324                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11325                         error(
11326                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11327                                 key.objectid, nodesize,
11328                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11329                         err = BACKREF_MISMATCH;
11330                 }
11331                 header_gen = btrfs_header_generation(eb);
11332                 extent_gen = btrfs_extent_generation(leaf, ei);
11333                 if (header_gen != extent_gen) {
11334                         error(
11335         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11336                                 key.objectid, nodesize, header_gen,
11337                                 extent_gen);
11338                         err = BACKREF_MISMATCH;
11339                 }
11340                 if (level != skinny_level) {
11341                         error(
11342                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11343                                 key.objectid, nodesize, level, skinny_level);
11344                         err = BACKREF_MISMATCH;
11345                 }
11346                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11347                         error(
11348                         "extent[%llu %u] is referred by other roots than %llu",
11349                                 key.objectid, nodesize, root->objectid);
11350                         err = BACKREF_MISMATCH;
11351                 }
11352         }
11353
11354         /*
11355          * Iterate the extent/metadata item to find the exact backref
11356          */
11357         item_size = btrfs_item_size_nr(leaf, slot);
11358         ptr = (unsigned long)iref;
11359         end = (unsigned long)ei + item_size;
11360
11361         while (ptr < end) {
11362                 iref = (struct btrfs_extent_inline_ref *)ptr;
11363                 type = btrfs_extent_inline_ref_type(leaf, iref);
11364                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11365
11366                 ret = check_extent_inline_ref(leaf, &key, iref);
11367                 if (ret) {
11368                         err |= ret;
11369                         break;
11370                 }
11371                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11372                         if (offset == root->objectid)
11373                                 found_ref = 1;
11374                         if (!strict && owner == offset)
11375                                 found_ref = 1;
11376                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11377                         /*
11378                          * Backref of tree reloc root points to itself, no need
11379                          * to check backref any more.
11380                          *
11381                          * This may be an error of loop backref, but extent tree
11382                          * checker should have already handled it.
11383                          * Here we only need to avoid infinite iteration.
11384                          */
11385                         if (offset == bytenr) {
11386                                 found_ref = 1;
11387                         } else {
11388                                 /*
11389                                  * Check if the backref points to valid
11390                                  * referencer
11391                                  */
11392                                 found_ref = !check_tree_block_ref( root, NULL,
11393                                                 offset, level + 1, owner,
11394                                                 NULL);
11395                         }
11396                 }
11397
11398                 if (found_ref)
11399                         break;
11400                 ptr += btrfs_extent_inline_ref_size(type);
11401         }
11402
11403         /*
11404          * Inlined extent item doesn't have what we need, check
11405          * TREE_BLOCK_REF_KEY
11406          */
11407         if (!found_ref) {
11408                 btrfs_release_path(&path);
11409                 key.objectid = bytenr;
11410                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11411                 key.offset = root->objectid;
11412
11413                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11414                 if (!ret)
11415                         found_ref = 1;
11416         }
11417         /*
11418          * Finally check SHARED BLOCK REF, any found will be good
11419          * Here we're not doing comprehensive extent backref checking,
11420          * only need to ensure there is some extent referring to this
11421          * tree block.
11422          */
11423         if (!found_ref) {
11424                 btrfs_release_path(&path);
11425                 key.objectid = bytenr;
11426                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11427                 key.offset = (u64)-1;
11428
11429                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11430                 if (ret < 0) {
11431                         err |= BACKREF_MISSING;
11432                         goto out;
11433                 }
11434                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11435                 if (ret) {
11436                         err |= BACKREF_MISSING;
11437                         goto out;
11438                 }
11439                 found_ref = 1;
11440         }
11441         if (!found_ref)
11442                 err |= BACKREF_MISSING;
11443 out:
11444         btrfs_release_path(&path);
11445         if (nrefs && strict &&
11446             level < root_level && nrefs->full_backref[level + 1])
11447                 parent = nrefs->bytenr[level + 1];
11448         if (eb && (err & BACKREF_MISSING))
11449                 error(
11450         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11451                       bytenr, nodesize, owner, level,
11452                       parent ? "parent" : "root",
11453                       parent ? parent : root->objectid);
11454         return err;
11455 }
11456
11457 /*
11458  * If @err contains BACKREF_MISSING then add extent of the
11459  * file_extent_data_item.
11460  *
11461  * Returns error bits after reapir.
11462  */
11463 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11464                                    struct btrfs_root *root,
11465                                    struct btrfs_path *pathp,
11466                                    struct node_refs *nrefs,
11467                                    int err)
11468 {
11469         struct btrfs_file_extent_item *fi;
11470         struct btrfs_key fi_key;
11471         struct btrfs_key key;
11472         struct btrfs_extent_item *ei;
11473         struct btrfs_path path;
11474         struct btrfs_root *extent_root = root->fs_info->extent_root;
11475         struct extent_buffer *eb;
11476         u64 size;
11477         u64 disk_bytenr;
11478         u64 num_bytes;
11479         u64 parent;
11480         u64 offset;
11481         u64 extent_offset;
11482         u64 file_offset;
11483         int generation;
11484         int slot;
11485         int ret = 0;
11486
11487         eb = pathp->nodes[0];
11488         slot = pathp->slots[0];
11489         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11490         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11491
11492         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11493             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11494                 return err;
11495
11496         file_offset = fi_key.offset;
11497         generation = btrfs_file_extent_generation(eb, fi);
11498         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11499         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11500         extent_offset = btrfs_file_extent_offset(eb, fi);
11501         offset = file_offset - extent_offset;
11502
11503         /* now repair only adds backref */
11504         if ((err & BACKREF_MISSING) == 0)
11505                 return err;
11506
11507         /* search extent item */
11508         key.objectid = disk_bytenr;
11509         key.type = BTRFS_EXTENT_ITEM_KEY;
11510         key.offset = num_bytes;
11511
11512         btrfs_init_path(&path);
11513         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11514         if (ret < 0) {
11515                 ret = -EIO;
11516                 goto out;
11517         }
11518
11519         /* insert an extent item */
11520         if (ret > 0) {
11521                 key.objectid = disk_bytenr;
11522                 key.type = BTRFS_EXTENT_ITEM_KEY;
11523                 key.offset = num_bytes;
11524                 size = sizeof(*ei);
11525
11526                 btrfs_release_path(&path);
11527                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11528                                               size);
11529                 if (ret)
11530                         goto out;
11531                 eb = path.nodes[0];
11532                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11533
11534                 btrfs_set_extent_refs(eb, ei, 0);
11535                 btrfs_set_extent_generation(eb, ei, generation);
11536                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11537
11538                 btrfs_mark_buffer_dirty(eb);
11539                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11540                                                num_bytes, 1, 0);
11541                 btrfs_release_path(&path);
11542         }
11543
11544         if (nrefs->full_backref[0])
11545                 parent = btrfs_header_bytenr(eb);
11546         else
11547                 parent = 0;
11548
11549         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11550                                    root->objectid,
11551                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11552                                    offset);
11553         if (ret) {
11554                 error(
11555                 "failed to increase extent data backref[%llu %llu] root %llu",
11556                       disk_bytenr, num_bytes, root->objectid);
11557                 goto out;
11558         } else {
11559                 printf("Add one extent data backref [%llu %llu]\n",
11560                        disk_bytenr, num_bytes);
11561         }
11562
11563         err &= ~BACKREF_MISSING;
11564 out:
11565         if (ret)
11566                 error("can't repair root %llu extent data item[%llu %llu]",
11567                       root->objectid, disk_bytenr, num_bytes);
11568         return err;
11569 }
11570
11571 /*
11572  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11573  *
11574  * Return >0 any error found and output error message
11575  * Return 0 for no error found
11576  */
11577 static int check_extent_data_item(struct btrfs_root *root,
11578                                   struct btrfs_path *pathp,
11579                                   struct node_refs *nrefs,  int account_bytes)
11580 {
11581         struct btrfs_file_extent_item *fi;
11582         struct extent_buffer *eb = pathp->nodes[0];
11583         struct btrfs_path path;
11584         struct btrfs_root *extent_root = root->fs_info->extent_root;
11585         struct btrfs_key fi_key;
11586         struct btrfs_key dbref_key;
11587         struct extent_buffer *leaf;
11588         struct btrfs_extent_item *ei;
11589         struct btrfs_extent_inline_ref *iref;
11590         struct btrfs_extent_data_ref *dref;
11591         u64 owner;
11592         u64 disk_bytenr;
11593         u64 disk_num_bytes;
11594         u64 extent_num_bytes;
11595         u64 extent_flags;
11596         u64 offset;
11597         u32 item_size;
11598         unsigned long end;
11599         unsigned long ptr;
11600         int type;
11601         int found_dbackref = 0;
11602         int slot = pathp->slots[0];
11603         int err = 0;
11604         int ret;
11605         int strict;
11606
11607         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11608         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11609
11610         /* Nothing to check for hole and inline data extents */
11611         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11612             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11613                 return 0;
11614
11615         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11616         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11617         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11618         offset = btrfs_file_extent_offset(eb, fi);
11619
11620         /* Check unaligned disk_num_bytes and num_bytes */
11621         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11622                 error(
11623 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11624                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11625                         root->fs_info->sectorsize);
11626                 err |= BYTES_UNALIGNED;
11627         } else if (account_bytes) {
11628                 data_bytes_allocated += disk_num_bytes;
11629         }
11630         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11631                 error(
11632 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11633                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11634                         root->fs_info->sectorsize);
11635                 err |= BYTES_UNALIGNED;
11636         } else if (account_bytes) {
11637                 data_bytes_referenced += extent_num_bytes;
11638         }
11639         owner = btrfs_header_owner(eb);
11640
11641         /* Check the extent item of the file extent in extent tree */
11642         btrfs_init_path(&path);
11643         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11644         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11645         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11646
11647         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11648         if (ret)
11649                 goto out;
11650
11651         leaf = path.nodes[0];
11652         slot = path.slots[0];
11653         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11654
11655         extent_flags = btrfs_extent_flags(leaf, ei);
11656
11657         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11658                 error(
11659                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11660                     disk_bytenr, disk_num_bytes,
11661                     BTRFS_EXTENT_FLAG_DATA);
11662                 err |= BACKREF_MISMATCH;
11663         }
11664
11665         /* Check data backref inside that extent item */
11666         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11667         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11668         ptr = (unsigned long)iref;
11669         end = (unsigned long)ei + item_size;
11670         strict = should_check_extent_strictly(root, nrefs, -1);
11671
11672         while (ptr < end) {
11673                 u64 ref_root;
11674                 u64 ref_objectid;
11675                 u64 ref_offset;
11676                 bool match = false;
11677
11678                 iref = (struct btrfs_extent_inline_ref *)ptr;
11679                 type = btrfs_extent_inline_ref_type(leaf, iref);
11680                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11681
11682                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11683                 if (ret) {
11684                         err |= ret;
11685                         break;
11686                 }
11687                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11688                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11689                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11690                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11691
11692                         if (ref_objectid == fi_key.objectid &&
11693                             ref_offset == fi_key.offset - offset)
11694                                 match = true;
11695                         if (ref_root == root->objectid && match)
11696                                 found_dbackref = 1;
11697                         else if (!strict && owner == ref_root && match)
11698                                 found_dbackref = 1;
11699                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11700                         found_dbackref = !check_tree_block_ref(root, NULL,
11701                                 btrfs_extent_inline_ref_offset(leaf, iref),
11702                                 0, owner, NULL);
11703                 }
11704
11705                 if (found_dbackref)
11706                         break;
11707                 ptr += btrfs_extent_inline_ref_size(type);
11708         }
11709
11710         if (!found_dbackref) {
11711                 btrfs_release_path(&path);
11712
11713                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11714                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11715                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11716                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11717                                 fi_key.objectid, fi_key.offset - offset);
11718
11719                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11720                                         &dbref_key, &path, 0, 0);
11721                 if (!ret) {
11722                         found_dbackref = 1;
11723                         goto out;
11724                 }
11725
11726                 btrfs_release_path(&path);
11727
11728                 /*
11729                  * Neither inlined nor EXTENT_DATA_REF found, try
11730                  * SHARED_DATA_REF as last chance.
11731                  */
11732                 dbref_key.objectid = disk_bytenr;
11733                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11734                 dbref_key.offset = eb->start;
11735
11736                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11737                                         &dbref_key, &path, 0, 0);
11738                 if (!ret) {
11739                         found_dbackref = 1;
11740                         goto out;
11741                 }
11742         }
11743
11744 out:
11745         if (!found_dbackref)
11746                 err |= BACKREF_MISSING;
11747         btrfs_release_path(&path);
11748         if (err & BACKREF_MISSING) {
11749                 error("data extent[%llu %llu] backref lost",
11750                       disk_bytenr, disk_num_bytes);
11751         }
11752         return err;
11753 }
11754
11755 /*
11756  * Get real tree block level for the case like shared block
11757  * Return >= 0 as tree level
11758  * Return <0 for error
11759  */
11760 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11761 {
11762         struct extent_buffer *eb;
11763         struct btrfs_path path;
11764         struct btrfs_key key;
11765         struct btrfs_extent_item *ei;
11766         u64 flags;
11767         u64 transid;
11768         u8 backref_level;
11769         u8 header_level;
11770         int ret;
11771
11772         /* Search extent tree for extent generation and level */
11773         key.objectid = bytenr;
11774         key.type = BTRFS_METADATA_ITEM_KEY;
11775         key.offset = (u64)-1;
11776
11777         btrfs_init_path(&path);
11778         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11779         if (ret < 0)
11780                 goto release_out;
11781         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11782         if (ret < 0)
11783                 goto release_out;
11784         if (ret > 0) {
11785                 ret = -ENOENT;
11786                 goto release_out;
11787         }
11788
11789         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11790         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11791                             struct btrfs_extent_item);
11792         flags = btrfs_extent_flags(path.nodes[0], ei);
11793         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11794                 ret = -ENOENT;
11795                 goto release_out;
11796         }
11797
11798         /* Get transid for later read_tree_block() check */
11799         transid = btrfs_extent_generation(path.nodes[0], ei);
11800
11801         /* Get backref level as one source */
11802         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11803                 backref_level = key.offset;
11804         } else {
11805                 struct btrfs_tree_block_info *info;
11806
11807                 info = (struct btrfs_tree_block_info *)(ei + 1);
11808                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11809         }
11810         btrfs_release_path(&path);
11811
11812         /* Get level from tree block as an alternative source */
11813         eb = read_tree_block(fs_info, bytenr, transid);
11814         if (!extent_buffer_uptodate(eb)) {
11815                 free_extent_buffer(eb);
11816                 return -EIO;
11817         }
11818         header_level = btrfs_header_level(eb);
11819         free_extent_buffer(eb);
11820
11821         if (header_level != backref_level)
11822                 return -EIO;
11823         return header_level;
11824
11825 release_out:
11826         btrfs_release_path(&path);
11827         return ret;
11828 }
11829
11830 /*
11831  * Check if a tree block backref is valid (points to a valid tree block)
11832  * if level == -1, level will be resolved
11833  * Return >0 for any error found and print error message
11834  */
11835 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11836                                     u64 bytenr, int level)
11837 {
11838         struct btrfs_root *root;
11839         struct btrfs_key key;
11840         struct btrfs_path path;
11841         struct extent_buffer *eb;
11842         struct extent_buffer *node;
11843         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11844         int err = 0;
11845         int ret;
11846
11847         /* Query level for level == -1 special case */
11848         if (level == -1)
11849                 level = query_tree_block_level(fs_info, bytenr);
11850         if (level < 0) {
11851                 err |= REFERENCER_MISSING;
11852                 goto out;
11853         }
11854
11855         key.objectid = root_id;
11856         key.type = BTRFS_ROOT_ITEM_KEY;
11857         key.offset = (u64)-1;
11858
11859         root = btrfs_read_fs_root(fs_info, &key);
11860         if (IS_ERR(root)) {
11861                 err |= REFERENCER_MISSING;
11862                 goto out;
11863         }
11864
11865         /* Read out the tree block to get item/node key */
11866         eb = read_tree_block(fs_info, bytenr, 0);
11867         if (!extent_buffer_uptodate(eb)) {
11868                 err |= REFERENCER_MISSING;
11869                 free_extent_buffer(eb);
11870                 goto out;
11871         }
11872
11873         /* Empty tree, no need to check key */
11874         if (!btrfs_header_nritems(eb) && !level) {
11875                 free_extent_buffer(eb);
11876                 goto out;
11877         }
11878
11879         if (level)
11880                 btrfs_node_key_to_cpu(eb, &key, 0);
11881         else
11882                 btrfs_item_key_to_cpu(eb, &key, 0);
11883
11884         free_extent_buffer(eb);
11885
11886         btrfs_init_path(&path);
11887         path.lowest_level = level;
11888         /* Search with the first key, to ensure we can reach it */
11889         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
11890         if (ret < 0) {
11891                 err |= REFERENCER_MISSING;
11892                 goto release_out;
11893         }
11894
11895         node = path.nodes[level];
11896         if (btrfs_header_bytenr(node) != bytenr) {
11897                 error(
11898         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
11899                         bytenr, nodesize, bytenr,
11900                         btrfs_header_bytenr(node));
11901                 err |= REFERENCER_MISMATCH;
11902         }
11903         if (btrfs_header_level(node) != level) {
11904                 error(
11905         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
11906                         bytenr, nodesize, level,
11907                         btrfs_header_level(node));
11908                 err |= REFERENCER_MISMATCH;
11909         }
11910
11911 release_out:
11912         btrfs_release_path(&path);
11913 out:
11914         if (err & REFERENCER_MISSING) {
11915                 if (level < 0)
11916                         error("extent [%llu %d] lost referencer (owner: %llu)",
11917                                 bytenr, nodesize, root_id);
11918                 else
11919                         error(
11920                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
11921                                 bytenr, nodesize, root_id, level);
11922         }
11923
11924         return err;
11925 }
11926
11927 /*
11928  * Check if tree block @eb is tree reloc root.
11929  * Return 0 if it's not or any problem happens
11930  * Return 1 if it's a tree reloc root
11931  */
11932 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
11933                                  struct extent_buffer *eb)
11934 {
11935         struct btrfs_root *tree_reloc_root;
11936         struct btrfs_key key;
11937         u64 bytenr = btrfs_header_bytenr(eb);
11938         u64 owner = btrfs_header_owner(eb);
11939         int ret = 0;
11940
11941         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
11942         key.offset = owner;
11943         key.type = BTRFS_ROOT_ITEM_KEY;
11944
11945         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
11946         if (IS_ERR(tree_reloc_root))
11947                 return 0;
11948
11949         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
11950                 ret = 1;
11951         btrfs_free_fs_root(tree_reloc_root);
11952         return ret;
11953 }
11954
11955 /*
11956  * Check referencer for shared block backref
11957  * If level == -1, this function will resolve the level.
11958  */
11959 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
11960                                      u64 parent, u64 bytenr, int level)
11961 {
11962         struct extent_buffer *eb;
11963         u32 nr;
11964         int found_parent = 0;
11965         int i;
11966
11967         eb = read_tree_block(fs_info, parent, 0);
11968         if (!extent_buffer_uptodate(eb))
11969                 goto out;
11970
11971         if (level == -1)
11972                 level = query_tree_block_level(fs_info, bytenr);
11973         if (level < 0)
11974                 goto out;
11975
11976         /* It's possible it's a tree reloc root */
11977         if (parent == bytenr) {
11978                 if (is_tree_reloc_root(fs_info, eb))
11979                         found_parent = 1;
11980                 goto out;
11981         }
11982
11983         if (level + 1 != btrfs_header_level(eb))
11984                 goto out;
11985
11986         nr = btrfs_header_nritems(eb);
11987         for (i = 0; i < nr; i++) {
11988                 if (bytenr == btrfs_node_blockptr(eb, i)) {
11989                         found_parent = 1;
11990                         break;
11991                 }
11992         }
11993 out:
11994         free_extent_buffer(eb);
11995         if (!found_parent) {
11996                 error(
11997         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
11998                         bytenr, fs_info->nodesize, parent, level);
11999                 return REFERENCER_MISSING;
12000         }
12001         return 0;
12002 }
12003
12004 /*
12005  * Check referencer for normal (inlined) data ref
12006  * If len == 0, it will be resolved by searching in extent tree
12007  */
12008 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12009                                      u64 root_id, u64 objectid, u64 offset,
12010                                      u64 bytenr, u64 len, u32 count)
12011 {
12012         struct btrfs_root *root;
12013         struct btrfs_root *extent_root = fs_info->extent_root;
12014         struct btrfs_key key;
12015         struct btrfs_path path;
12016         struct extent_buffer *leaf;
12017         struct btrfs_file_extent_item *fi;
12018         u32 found_count = 0;
12019         int slot;
12020         int ret = 0;
12021
12022         if (!len) {
12023                 key.objectid = bytenr;
12024                 key.type = BTRFS_EXTENT_ITEM_KEY;
12025                 key.offset = (u64)-1;
12026
12027                 btrfs_init_path(&path);
12028                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12029                 if (ret < 0)
12030                         goto out;
12031                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12032                 if (ret)
12033                         goto out;
12034                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12035                 if (key.objectid != bytenr ||
12036                     key.type != BTRFS_EXTENT_ITEM_KEY)
12037                         goto out;
12038                 len = key.offset;
12039                 btrfs_release_path(&path);
12040         }
12041         key.objectid = root_id;
12042         key.type = BTRFS_ROOT_ITEM_KEY;
12043         key.offset = (u64)-1;
12044         btrfs_init_path(&path);
12045
12046         root = btrfs_read_fs_root(fs_info, &key);
12047         if (IS_ERR(root))
12048                 goto out;
12049
12050         key.objectid = objectid;
12051         key.type = BTRFS_EXTENT_DATA_KEY;
12052         /*
12053          * It can be nasty as data backref offset is
12054          * file offset - file extent offset, which is smaller or
12055          * equal to original backref offset.  The only special case is
12056          * overflow.  So we need to special check and do further search.
12057          */
12058         key.offset = offset & (1ULL << 63) ? 0 : offset;
12059
12060         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12061         if (ret < 0)
12062                 goto out;
12063
12064         /*
12065          * Search afterwards to get correct one
12066          * NOTE: As we must do a comprehensive check on the data backref to
12067          * make sure the dref count also matches, we must iterate all file
12068          * extents for that inode.
12069          */
12070         while (1) {
12071                 leaf = path.nodes[0];
12072                 slot = path.slots[0];
12073
12074                 if (slot >= btrfs_header_nritems(leaf) ||
12075                     btrfs_header_owner(leaf) != root_id)
12076                         goto next;
12077                 btrfs_item_key_to_cpu(leaf, &key, slot);
12078                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12079                         break;
12080                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12081                 /*
12082                  * Except normal disk bytenr and disk num bytes, we still
12083                  * need to do extra check on dbackref offset as
12084                  * dbackref offset = file_offset - file_extent_offset
12085                  *
12086                  * Also, we must check the leaf owner.
12087                  * In case of shared tree blocks (snapshots) we can inherit
12088                  * leaves from source snapshot.
12089                  * In that case, reference from source snapshot should not
12090                  * count.
12091                  */
12092                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12093                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12094                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12095                     offset && btrfs_header_owner(leaf) == root_id)
12096                         found_count++;
12097
12098 next:
12099                 ret = btrfs_next_item(root, &path);
12100                 if (ret)
12101                         break;
12102         }
12103 out:
12104         btrfs_release_path(&path);
12105         if (found_count != count) {
12106                 error(
12107 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12108                         bytenr, len, root_id, objectid, offset, count, found_count);
12109                 return REFERENCER_MISSING;
12110         }
12111         return 0;
12112 }
12113
12114 /*
12115  * Check if the referencer of a shared data backref exists
12116  */
12117 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12118                                      u64 parent, u64 bytenr)
12119 {
12120         struct extent_buffer *eb;
12121         struct btrfs_key key;
12122         struct btrfs_file_extent_item *fi;
12123         u32 nr;
12124         int found_parent = 0;
12125         int i;
12126
12127         eb = read_tree_block(fs_info, parent, 0);
12128         if (!extent_buffer_uptodate(eb))
12129                 goto out;
12130
12131         nr = btrfs_header_nritems(eb);
12132         for (i = 0; i < nr; i++) {
12133                 btrfs_item_key_to_cpu(eb, &key, i);
12134                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12135                         continue;
12136
12137                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12138                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12139                         continue;
12140
12141                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12142                         found_parent = 1;
12143                         break;
12144                 }
12145         }
12146
12147 out:
12148         free_extent_buffer(eb);
12149         if (!found_parent) {
12150                 error("shared extent %llu referencer lost (parent: %llu)",
12151                         bytenr, parent);
12152                 return REFERENCER_MISSING;
12153         }
12154         return 0;
12155 }
12156
12157 /*
12158  * Only delete backref if REFERENCER_MISSING now
12159  *
12160  * Returns <0   the extent was deleted
12161  * Returns >0   the backref was deleted but extent still exists, returned value
12162  *               means error after repair
12163  * Returns  0   nothing happened
12164  */
12165 static int repair_extent_item(struct btrfs_trans_handle *trans,
12166                       struct btrfs_root *root, struct btrfs_path *path,
12167                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12168                       u64 owner, u64 offset, int err)
12169 {
12170         struct btrfs_key old_key;
12171         int freed = 0;
12172         int ret;
12173
12174         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12175
12176         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12177                 /* delete the backref */
12178                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12179                           num_bytes, parent, root_objectid, owner, offset);
12180                 if (!ret) {
12181                         freed = 1;
12182                         err &= ~REFERENCER_MISSING;
12183                         printf("Delete backref in extent [%llu %llu]\n",
12184                                bytenr, num_bytes);
12185                 } else {
12186                         error("fail to delete backref in extent [%llu %llu]",
12187                                bytenr, num_bytes);
12188                 }
12189         }
12190
12191         /* btrfs_free_extent may delete the extent */
12192         btrfs_release_path(path);
12193         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12194
12195         if (ret)
12196                 ret = -ENOENT;
12197         else if (freed)
12198                 ret = err;
12199         return ret;
12200 }
12201
12202 /*
12203  * This function will check a given extent item, including its backref and
12204  * itself (like crossing stripe boundary and type)
12205  *
12206  * Since we don't use extent_record anymore, introduce new error bit
12207  */
12208 static int check_extent_item(struct btrfs_trans_handle *trans,
12209                              struct btrfs_fs_info *fs_info,
12210                              struct btrfs_path *path)
12211 {
12212         struct btrfs_extent_item *ei;
12213         struct btrfs_extent_inline_ref *iref;
12214         struct btrfs_extent_data_ref *dref;
12215         struct extent_buffer *eb = path->nodes[0];
12216         unsigned long end;
12217         unsigned long ptr;
12218         int slot = path->slots[0];
12219         int type;
12220         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12221         u32 item_size = btrfs_item_size_nr(eb, slot);
12222         u64 flags;
12223         u64 offset;
12224         u64 parent;
12225         u64 num_bytes;
12226         u64 root_objectid;
12227         u64 owner;
12228         u64 owner_offset;
12229         int metadata = 0;
12230         int level;
12231         struct btrfs_key key;
12232         int ret;
12233         int err = 0;
12234
12235         btrfs_item_key_to_cpu(eb, &key, slot);
12236         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12237                 bytes_used += key.offset;
12238                 num_bytes = key.offset;
12239         } else {
12240                 bytes_used += nodesize;
12241                 num_bytes = nodesize;
12242         }
12243
12244         if (item_size < sizeof(*ei)) {
12245                 /*
12246                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12247                  * old thing when on disk format is still un-determined.
12248                  * No need to care about it anymore
12249                  */
12250                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12251                 return -ENOTTY;
12252         }
12253
12254         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12255         flags = btrfs_extent_flags(eb, ei);
12256
12257         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12258                 metadata = 1;
12259         if (metadata && check_crossing_stripes(global_info, key.objectid,
12260                                                eb->len)) {
12261                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12262                       key.objectid, key.objectid + nodesize);
12263                 err |= CROSSING_STRIPE_BOUNDARY;
12264         }
12265
12266         ptr = (unsigned long)(ei + 1);
12267
12268         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12269                 /* Old EXTENT_ITEM metadata */
12270                 struct btrfs_tree_block_info *info;
12271
12272                 info = (struct btrfs_tree_block_info *)ptr;
12273                 level = btrfs_tree_block_level(eb, info);
12274                 ptr += sizeof(struct btrfs_tree_block_info);
12275         } else {
12276                 /* New METADATA_ITEM */
12277                 level = key.offset;
12278         }
12279         end = (unsigned long)ei + item_size;
12280
12281 next:
12282         /* Reached extent item end normally */
12283         if (ptr == end)
12284                 goto out;
12285
12286         /* Beyond extent item end, wrong item size */
12287         if (ptr > end) {
12288                 err |= ITEM_SIZE_MISMATCH;
12289                 error("extent item at bytenr %llu slot %d has wrong size",
12290                         eb->start, slot);
12291                 goto out;
12292         }
12293
12294         parent = 0;
12295         root_objectid = 0;
12296         owner = 0;
12297         owner_offset = 0;
12298         /* Now check every backref in this extent item */
12299         iref = (struct btrfs_extent_inline_ref *)ptr;
12300         type = btrfs_extent_inline_ref_type(eb, iref);
12301         offset = btrfs_extent_inline_ref_offset(eb, iref);
12302         switch (type) {
12303         case BTRFS_TREE_BLOCK_REF_KEY:
12304                 root_objectid = offset;
12305                 owner = level;
12306                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12307                                                level);
12308                 err |= ret;
12309                 break;
12310         case BTRFS_SHARED_BLOCK_REF_KEY:
12311                 parent = offset;
12312                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12313                                                  level);
12314                 err |= ret;
12315                 break;
12316         case BTRFS_EXTENT_DATA_REF_KEY:
12317                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12318                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12319                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12320                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12321                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12322                                         owner_offset, key.objectid, key.offset,
12323                                         btrfs_extent_data_ref_count(eb, dref));
12324                 err |= ret;
12325                 break;
12326         case BTRFS_SHARED_DATA_REF_KEY:
12327                 parent = offset;
12328                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12329                 err |= ret;
12330                 break;
12331         default:
12332                 error("extent[%llu %d %llu] has unknown ref type: %d",
12333                         key.objectid, key.type, key.offset, type);
12334                 ret = UNKNOWN_TYPE;
12335                 err |= ret;
12336                 goto out;
12337         }
12338
12339         if (err && repair) {
12340                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12341                          key.objectid, num_bytes, parent, root_objectid,
12342                          owner, owner_offset, ret);
12343                 if (ret < 0)
12344                         goto out;
12345                 if (ret) {
12346                         goto next;
12347                         err = ret;
12348                 }
12349         }
12350
12351         ptr += btrfs_extent_inline_ref_size(type);
12352         goto next;
12353
12354 out:
12355         return err;
12356 }
12357
12358 /*
12359  * Check if a dev extent item is referred correctly by its chunk
12360  */
12361 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12362                                  struct extent_buffer *eb, int slot)
12363 {
12364         struct btrfs_root *chunk_root = fs_info->chunk_root;
12365         struct btrfs_dev_extent *ptr;
12366         struct btrfs_path path;
12367         struct btrfs_key chunk_key;
12368         struct btrfs_key devext_key;
12369         struct btrfs_chunk *chunk;
12370         struct extent_buffer *l;
12371         int num_stripes;
12372         u64 length;
12373         int i;
12374         int found_chunk = 0;
12375         int ret;
12376
12377         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12378         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12379         length = btrfs_dev_extent_length(eb, ptr);
12380
12381         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12382         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12383         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12384
12385         btrfs_init_path(&path);
12386         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12387         if (ret)
12388                 goto out;
12389
12390         l = path.nodes[0];
12391         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12392         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12393                                       chunk_key.offset);
12394         if (ret < 0)
12395                 goto out;
12396
12397         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12398                 goto out;
12399
12400         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12401         for (i = 0; i < num_stripes; i++) {
12402                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12403                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12404
12405                 if (devid == devext_key.objectid &&
12406                     offset == devext_key.offset) {
12407                         found_chunk = 1;
12408                         break;
12409                 }
12410         }
12411 out:
12412         btrfs_release_path(&path);
12413         if (!found_chunk) {
12414                 error(
12415                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12416                         devext_key.objectid, devext_key.offset, length);
12417                 return REFERENCER_MISSING;
12418         }
12419         return 0;
12420 }
12421
12422 /*
12423  * Check if the used space is correct with the dev item
12424  */
12425 static int check_dev_item(struct btrfs_fs_info *fs_info,
12426                           struct extent_buffer *eb, int slot)
12427 {
12428         struct btrfs_root *dev_root = fs_info->dev_root;
12429         struct btrfs_dev_item *dev_item;
12430         struct btrfs_path path;
12431         struct btrfs_key key;
12432         struct btrfs_dev_extent *ptr;
12433         u64 total_bytes;
12434         u64 dev_id;
12435         u64 used;
12436         u64 total = 0;
12437         int ret;
12438
12439         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12440         dev_id = btrfs_device_id(eb, dev_item);
12441         used = btrfs_device_bytes_used(eb, dev_item);
12442         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12443
12444         key.objectid = dev_id;
12445         key.type = BTRFS_DEV_EXTENT_KEY;
12446         key.offset = 0;
12447
12448         btrfs_init_path(&path);
12449         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12450         if (ret < 0) {
12451                 btrfs_item_key_to_cpu(eb, &key, slot);
12452                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12453                         key.objectid, key.type, key.offset);
12454                 btrfs_release_path(&path);
12455                 return REFERENCER_MISSING;
12456         }
12457
12458         /* Iterate dev_extents to calculate the used space of a device */
12459         while (1) {
12460                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12461                         goto next;
12462
12463                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12464                 if (key.objectid > dev_id)
12465                         break;
12466                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12467                         goto next;
12468
12469                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12470                                      struct btrfs_dev_extent);
12471                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12472 next:
12473                 ret = btrfs_next_item(dev_root, &path);
12474                 if (ret)
12475                         break;
12476         }
12477         btrfs_release_path(&path);
12478
12479         if (used != total) {
12480                 btrfs_item_key_to_cpu(eb, &key, slot);
12481                 error(
12482 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12483                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12484                         BTRFS_DEV_EXTENT_KEY, dev_id);
12485                 return ACCOUNTING_MISMATCH;
12486         }
12487         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12488
12489         return 0;
12490 }
12491
12492 /*
12493  * Check a block group item with its referener (chunk) and its used space
12494  * with extent/metadata item
12495  */
12496 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12497                                   struct extent_buffer *eb, int slot)
12498 {
12499         struct btrfs_root *extent_root = fs_info->extent_root;
12500         struct btrfs_root *chunk_root = fs_info->chunk_root;
12501         struct btrfs_block_group_item *bi;
12502         struct btrfs_block_group_item bg_item;
12503         struct btrfs_path path;
12504         struct btrfs_key bg_key;
12505         struct btrfs_key chunk_key;
12506         struct btrfs_key extent_key;
12507         struct btrfs_chunk *chunk;
12508         struct extent_buffer *leaf;
12509         struct btrfs_extent_item *ei;
12510         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12511         u64 flags;
12512         u64 bg_flags;
12513         u64 used;
12514         u64 total = 0;
12515         int ret;
12516         int err = 0;
12517
12518         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12519         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12520         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12521         used = btrfs_block_group_used(&bg_item);
12522         bg_flags = btrfs_block_group_flags(&bg_item);
12523
12524         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12525         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12526         chunk_key.offset = bg_key.objectid;
12527
12528         btrfs_init_path(&path);
12529         /* Search for the referencer chunk */
12530         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12531         if (ret) {
12532                 error(
12533                 "block group[%llu %llu] did not find the related chunk item",
12534                         bg_key.objectid, bg_key.offset);
12535                 err |= REFERENCER_MISSING;
12536         } else {
12537                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12538                                         struct btrfs_chunk);
12539                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12540                                                 bg_key.offset) {
12541                         error(
12542         "block group[%llu %llu] related chunk item length does not match",
12543                                 bg_key.objectid, bg_key.offset);
12544                         err |= REFERENCER_MISMATCH;
12545                 }
12546         }
12547         btrfs_release_path(&path);
12548
12549         /* Search from the block group bytenr */
12550         extent_key.objectid = bg_key.objectid;
12551         extent_key.type = 0;
12552         extent_key.offset = 0;
12553
12554         btrfs_init_path(&path);
12555         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12556         if (ret < 0)
12557                 goto out;
12558
12559         /* Iterate extent tree to account used space */
12560         while (1) {
12561                 leaf = path.nodes[0];
12562
12563                 /* Search slot can point to the last item beyond leaf nritems */
12564                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12565                         goto next;
12566
12567                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12568                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12569                         break;
12570
12571                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12572                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12573                         goto next;
12574                 if (extent_key.objectid < bg_key.objectid)
12575                         goto next;
12576
12577                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12578                         total += nodesize;
12579                 else
12580                         total += extent_key.offset;
12581
12582                 ei = btrfs_item_ptr(leaf, path.slots[0],
12583                                     struct btrfs_extent_item);
12584                 flags = btrfs_extent_flags(leaf, ei);
12585                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12586                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12587                                 error(
12588                         "bad extent[%llu, %llu) type mismatch with chunk",
12589                                         extent_key.objectid,
12590                                         extent_key.objectid + extent_key.offset);
12591                                 err |= CHUNK_TYPE_MISMATCH;
12592                         }
12593                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12594                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12595                                     BTRFS_BLOCK_GROUP_METADATA))) {
12596                                 error(
12597                         "bad extent[%llu, %llu) type mismatch with chunk",
12598                                         extent_key.objectid,
12599                                         extent_key.objectid + nodesize);
12600                                 err |= CHUNK_TYPE_MISMATCH;
12601                         }
12602                 }
12603 next:
12604                 ret = btrfs_next_item(extent_root, &path);
12605                 if (ret)
12606                         break;
12607         }
12608
12609 out:
12610         btrfs_release_path(&path);
12611
12612         if (total != used) {
12613                 error(
12614                 "block group[%llu %llu] used %llu but extent items used %llu",
12615                         bg_key.objectid, bg_key.offset, used, total);
12616                 err |= BG_ACCOUNTING_ERROR;
12617         }
12618         return err;
12619 }
12620
12621 /*
12622  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12623  * FIXME: We still need to repair error of dev_item.
12624  *
12625  * Returns error after repair.
12626  */
12627 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12628                              struct btrfs_root *chunk_root,
12629                              struct btrfs_path *path, int err)
12630 {
12631         struct btrfs_chunk *chunk;
12632         struct btrfs_key chunk_key;
12633         struct extent_buffer *eb = path->nodes[0];
12634         u64 length;
12635         int slot = path->slots[0];
12636         u64 type;
12637         int ret = 0;
12638
12639         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12640         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12641                 return err;
12642         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12643         type = btrfs_chunk_type(path->nodes[0], chunk);
12644         length = btrfs_chunk_length(eb, chunk);
12645
12646         if (err & REFERENCER_MISSING) {
12647                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12648                                              type, chunk_key.offset, length);
12649                 if (ret) {
12650                         error("fail to add block group item[%llu %llu]",
12651                               chunk_key.offset, length);
12652                         goto out;
12653                 } else {
12654                         err &= ~REFERENCER_MISSING;
12655                         printf("Added block group item[%llu %llu]\n",
12656                                chunk_key.offset, length);
12657                 }
12658         }
12659
12660 out:
12661         return err;
12662 }
12663
12664 /*
12665  * Check a chunk item.
12666  * Including checking all referred dev_extents and block group
12667  */
12668 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12669                             struct extent_buffer *eb, int slot)
12670 {
12671         struct btrfs_root *extent_root = fs_info->extent_root;
12672         struct btrfs_root *dev_root = fs_info->dev_root;
12673         struct btrfs_path path;
12674         struct btrfs_key chunk_key;
12675         struct btrfs_key bg_key;
12676         struct btrfs_key devext_key;
12677         struct btrfs_chunk *chunk;
12678         struct extent_buffer *leaf;
12679         struct btrfs_block_group_item *bi;
12680         struct btrfs_block_group_item bg_item;
12681         struct btrfs_dev_extent *ptr;
12682         u64 length;
12683         u64 chunk_end;
12684         u64 stripe_len;
12685         u64 type;
12686         int num_stripes;
12687         u64 offset;
12688         u64 objectid;
12689         int i;
12690         int ret;
12691         int err = 0;
12692
12693         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12694         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12695         length = btrfs_chunk_length(eb, chunk);
12696         chunk_end = chunk_key.offset + length;
12697         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12698                                       chunk_key.offset);
12699         if (ret < 0) {
12700                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12701                         chunk_end);
12702                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12703                 goto out;
12704         }
12705         type = btrfs_chunk_type(eb, chunk);
12706
12707         bg_key.objectid = chunk_key.offset;
12708         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12709         bg_key.offset = length;
12710
12711         btrfs_init_path(&path);
12712         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12713         if (ret) {
12714                 error(
12715                 "chunk[%llu %llu) did not find the related block group item",
12716                         chunk_key.offset, chunk_end);
12717                 err |= REFERENCER_MISSING;
12718         } else{
12719                 leaf = path.nodes[0];
12720                 bi = btrfs_item_ptr(leaf, path.slots[0],
12721                                     struct btrfs_block_group_item);
12722                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12723                                    sizeof(bg_item));
12724                 if (btrfs_block_group_flags(&bg_item) != type) {
12725                         error(
12726 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12727                                 chunk_key.offset, chunk_end, type,
12728                                 btrfs_block_group_flags(&bg_item));
12729                         err |= REFERENCER_MISSING;
12730                 }
12731         }
12732
12733         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12734         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12735         for (i = 0; i < num_stripes; i++) {
12736                 btrfs_release_path(&path);
12737                 btrfs_init_path(&path);
12738                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12739                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12740                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12741
12742                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12743                                         0, 0);
12744                 if (ret)
12745                         goto not_match_dev;
12746
12747                 leaf = path.nodes[0];
12748                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12749                                      struct btrfs_dev_extent);
12750                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12751                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12752                 if (objectid != chunk_key.objectid ||
12753                     offset != chunk_key.offset ||
12754                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12755                         goto not_match_dev;
12756                 continue;
12757 not_match_dev:
12758                 err |= BACKREF_MISSING;
12759                 error(
12760                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12761                         chunk_key.objectid, chunk_end, i);
12762                 continue;
12763         }
12764         btrfs_release_path(&path);
12765 out:
12766         return err;
12767 }
12768
12769 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12770                                    struct btrfs_root *root,
12771                                    struct btrfs_path *path)
12772 {
12773         struct btrfs_key key;
12774         int ret = 0;
12775
12776         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12777         btrfs_release_path(path);
12778         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12779         if (ret) {
12780                 ret = -ENOENT;
12781                 goto out;
12782         }
12783
12784         ret = btrfs_del_item(trans, root, path);
12785         if (ret)
12786                 goto out;
12787
12788         if (path->slots[0] == 0)
12789                 btrfs_prev_leaf(root, path);
12790         else
12791                 path->slots[0]--;
12792 out:
12793         if (ret)
12794                 error("failed to delete root %llu item[%llu, %u, %llu]",
12795                       root->objectid, key.objectid, key.type, key.offset);
12796         else
12797                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12798                        root->objectid, key.objectid, key.type, key.offset);
12799         return ret;
12800 }
12801
12802 /*
12803  * Main entry function to check known items and update related accounting info
12804  */
12805 static int check_leaf_items(struct btrfs_trans_handle *trans,
12806                             struct btrfs_root *root, struct btrfs_path *path,
12807                             struct node_refs *nrefs, int account_bytes)
12808 {
12809         struct btrfs_fs_info *fs_info = root->fs_info;
12810         struct btrfs_key key;
12811         struct extent_buffer *eb;
12812         int slot;
12813         int type;
12814         struct btrfs_extent_data_ref *dref;
12815         int ret = 0;
12816         int err = 0;
12817
12818 again:
12819         eb = path->nodes[0];
12820         slot = path->slots[0];
12821         if (slot >= btrfs_header_nritems(eb)) {
12822                 if (slot == 0) {
12823                         error("empty leaf [%llu %u] root %llu", eb->start,
12824                                 root->fs_info->nodesize, root->objectid);
12825                         err |= EIO;
12826                 }
12827                 goto out;
12828         }
12829
12830         btrfs_item_key_to_cpu(eb, &key, slot);
12831         type = key.type;
12832
12833         switch (type) {
12834         case BTRFS_EXTENT_DATA_KEY:
12835                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12836                 if (repair && ret)
12837                         ret = repair_extent_data_item(trans, root, path, nrefs,
12838                                                       ret);
12839                 err |= ret;
12840                 break;
12841         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12842                 ret = check_block_group_item(fs_info, eb, slot);
12843                 if (repair &&
12844                     ret & REFERENCER_MISSING)
12845                         ret = delete_extent_tree_item(trans, root, path);
12846                 err |= ret;
12847                 break;
12848         case BTRFS_DEV_ITEM_KEY:
12849                 ret = check_dev_item(fs_info, eb, slot);
12850                 err |= ret;
12851                 break;
12852         case BTRFS_CHUNK_ITEM_KEY:
12853                 ret = check_chunk_item(fs_info, eb, slot);
12854                 if (repair && ret)
12855                         ret = repair_chunk_item(trans, root, path, ret);
12856                 err |= ret;
12857                 break;
12858         case BTRFS_DEV_EXTENT_KEY:
12859                 ret = check_dev_extent_item(fs_info, eb, slot);
12860                 err |= ret;
12861                 break;
12862         case BTRFS_EXTENT_ITEM_KEY:
12863         case BTRFS_METADATA_ITEM_KEY:
12864                 ret = check_extent_item(trans, fs_info, path);
12865                 err |= ret;
12866                 break;
12867         case BTRFS_EXTENT_CSUM_KEY:
12868                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12869                 err |= ret;
12870                 break;
12871         case BTRFS_TREE_BLOCK_REF_KEY:
12872                 ret = check_tree_block_backref(fs_info, key.offset,
12873                                                key.objectid, -1);
12874                 if (repair &&
12875                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12876                         ret = delete_extent_tree_item(trans, root, path);
12877                 err |= ret;
12878                 break;
12879         case BTRFS_EXTENT_DATA_REF_KEY:
12880                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12881                 ret = check_extent_data_backref(fs_info,
12882                                 btrfs_extent_data_ref_root(eb, dref),
12883                                 btrfs_extent_data_ref_objectid(eb, dref),
12884                                 btrfs_extent_data_ref_offset(eb, dref),
12885                                 key.objectid, 0,
12886                                 btrfs_extent_data_ref_count(eb, dref));
12887                 if (repair &&
12888                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12889                         ret = delete_extent_tree_item(trans, root, path);
12890                 err |= ret;
12891                 break;
12892         case BTRFS_SHARED_BLOCK_REF_KEY:
12893                 ret = check_shared_block_backref(fs_info, key.offset,
12894                                                  key.objectid, -1);
12895                 if (repair &&
12896                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12897                         ret = delete_extent_tree_item(trans, root, path);
12898                 err |= ret;
12899                 break;
12900         case BTRFS_SHARED_DATA_REF_KEY:
12901                 ret = check_shared_data_backref(fs_info, key.offset,
12902                                                 key.objectid);
12903                 if (repair &&
12904                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12905                         ret = delete_extent_tree_item(trans, root, path);
12906                 err |= ret;
12907                 break;
12908         default:
12909                 break;
12910         }
12911
12912         ++path->slots[0];
12913         goto again;
12914 out:
12915         return err;
12916 }
12917
12918 /*
12919  * Low memory usage version check_chunks_and_extents.
12920  */
12921 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
12922 {
12923         struct btrfs_trans_handle *trans = NULL;
12924         struct btrfs_path path;
12925         struct btrfs_key old_key;
12926         struct btrfs_key key;
12927         struct btrfs_root *root1;
12928         struct btrfs_root *root;
12929         struct btrfs_root *cur_root;
12930         int err = 0;
12931         int ret;
12932
12933         root = fs_info->fs_root;
12934
12935         if (repair) {
12936                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
12937                 if (IS_ERR(trans)) {
12938                         error("failed to start transaction before check");
12939                         return PTR_ERR(trans);
12940                 }
12941         }
12942
12943         root1 = root->fs_info->chunk_root;
12944         ret = check_btrfs_root(trans, root1, 0, 1);
12945         err |= ret;
12946
12947         root1 = root->fs_info->tree_root;
12948         ret = check_btrfs_root(trans, root1, 0, 1);
12949         err |= ret;
12950
12951         btrfs_init_path(&path);
12952         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
12953         key.offset = 0;
12954         key.type = BTRFS_ROOT_ITEM_KEY;
12955
12956         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
12957         if (ret) {
12958                 error("cannot find extent tree in tree_root");
12959                 goto out;
12960         }
12961
12962         while (1) {
12963                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12964                 if (key.type != BTRFS_ROOT_ITEM_KEY)
12965                         goto next;
12966                 old_key = key;
12967                 key.offset = (u64)-1;
12968
12969                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12970                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
12971                                         &key);
12972                 else
12973                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
12974                 if (IS_ERR(cur_root) || !cur_root) {
12975                         error("failed to read tree: %lld", key.objectid);
12976                         goto next;
12977                 }
12978
12979                 ret = check_btrfs_root(trans, cur_root, 0, 1);
12980                 err |= ret;
12981
12982                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12983                         btrfs_free_fs_root(cur_root);
12984
12985                 btrfs_release_path(&path);
12986                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
12987                                         &old_key, &path, 0, 0);
12988                 if (ret)
12989                         goto out;
12990 next:
12991                 ret = btrfs_next_item(root1, &path);
12992                 if (ret)
12993                         goto out;
12994         }
12995 out:
12996
12997         /* if repair, update block accounting */
12998         if (repair) {
12999                 ret = btrfs_fix_block_accounting(trans, root);
13000                 if (ret)
13001                         err |= ret;
13002                 else
13003                         err &= ~BG_ACCOUNTING_ERROR;
13004         }
13005
13006         if (trans)
13007                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13008
13009         btrfs_release_path(&path);
13010
13011         return err;
13012 }
13013
13014 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13015 {
13016         int ret;
13017
13018         if (!ctx.progress_enabled)
13019                 fprintf(stderr, "checking extents\n");
13020         if (check_mode == CHECK_MODE_LOWMEM)
13021                 ret = check_chunks_and_extents_v2(fs_info);
13022         else
13023                 ret = check_chunks_and_extents(fs_info);
13024
13025         /* Also repair device size related problems */
13026         if (repair && !ret) {
13027                 ret = btrfs_fix_device_and_super_size(fs_info);
13028                 if (ret > 0)
13029                         ret = 0;
13030         }
13031         return ret;
13032 }
13033
13034 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13035                            struct btrfs_root *root, int overwrite)
13036 {
13037         struct extent_buffer *c;
13038         struct extent_buffer *old = root->node;
13039         int level;
13040         int ret;
13041         struct btrfs_disk_key disk_key = {0,0,0};
13042
13043         level = 0;
13044
13045         if (overwrite) {
13046                 c = old;
13047                 extent_buffer_get(c);
13048                 goto init;
13049         }
13050         c = btrfs_alloc_free_block(trans, root,
13051                                    root->fs_info->nodesize,
13052                                    root->root_key.objectid,
13053                                    &disk_key, level, 0, 0);
13054         if (IS_ERR(c)) {
13055                 c = old;
13056                 extent_buffer_get(c);
13057                 overwrite = 1;
13058         }
13059 init:
13060         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13061         btrfs_set_header_level(c, level);
13062         btrfs_set_header_bytenr(c, c->start);
13063         btrfs_set_header_generation(c, trans->transid);
13064         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13065         btrfs_set_header_owner(c, root->root_key.objectid);
13066
13067         write_extent_buffer(c, root->fs_info->fsid,
13068                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13069
13070         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13071                             btrfs_header_chunk_tree_uuid(c),
13072                             BTRFS_UUID_SIZE);
13073
13074         btrfs_mark_buffer_dirty(c);
13075         /*
13076          * this case can happen in the following case:
13077          *
13078          * 1.overwrite previous root.
13079          *
13080          * 2.reinit reloc data root, this is because we skip pin
13081          * down reloc data tree before which means we can allocate
13082          * same block bytenr here.
13083          */
13084         if (old->start == c->start) {
13085                 btrfs_set_root_generation(&root->root_item,
13086                                           trans->transid);
13087                 root->root_item.level = btrfs_header_level(root->node);
13088                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13089                                         &root->root_key, &root->root_item);
13090                 if (ret) {
13091                         free_extent_buffer(c);
13092                         return ret;
13093                 }
13094         }
13095         free_extent_buffer(old);
13096         root->node = c;
13097         add_root_to_dirty_list(root);
13098         return 0;
13099 }
13100
13101 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13102                                 struct extent_buffer *eb, int tree_root)
13103 {
13104         struct extent_buffer *tmp;
13105         struct btrfs_root_item *ri;
13106         struct btrfs_key key;
13107         u64 bytenr;
13108         int level = btrfs_header_level(eb);
13109         int nritems;
13110         int ret;
13111         int i;
13112
13113         /*
13114          * If we have pinned this block before, don't pin it again.
13115          * This can not only avoid forever loop with broken filesystem
13116          * but also give us some speedups.
13117          */
13118         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13119                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13120                 return 0;
13121
13122         btrfs_pin_extent(fs_info, eb->start, eb->len);
13123
13124         nritems = btrfs_header_nritems(eb);
13125         for (i = 0; i < nritems; i++) {
13126                 if (level == 0) {
13127                         btrfs_item_key_to_cpu(eb, &key, i);
13128                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13129                                 continue;
13130                         /* Skip the extent root and reloc roots */
13131                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13132                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13133                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13134                                 continue;
13135                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13136                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13137
13138                         /*
13139                          * If at any point we start needing the real root we
13140                          * will have to build a stump root for the root we are
13141                          * in, but for now this doesn't actually use the root so
13142                          * just pass in extent_root.
13143                          */
13144                         tmp = read_tree_block(fs_info, bytenr, 0);
13145                         if (!extent_buffer_uptodate(tmp)) {
13146                                 fprintf(stderr, "Error reading root block\n");
13147                                 return -EIO;
13148                         }
13149                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13150                         free_extent_buffer(tmp);
13151                         if (ret)
13152                                 return ret;
13153                 } else {
13154                         bytenr = btrfs_node_blockptr(eb, i);
13155
13156                         /* If we aren't the tree root don't read the block */
13157                         if (level == 1 && !tree_root) {
13158                                 btrfs_pin_extent(fs_info, bytenr,
13159                                                 fs_info->nodesize);
13160                                 continue;
13161                         }
13162
13163                         tmp = read_tree_block(fs_info, bytenr, 0);
13164                         if (!extent_buffer_uptodate(tmp)) {
13165                                 fprintf(stderr, "Error reading tree block\n");
13166                                 return -EIO;
13167                         }
13168                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13169                         free_extent_buffer(tmp);
13170                         if (ret)
13171                                 return ret;
13172                 }
13173         }
13174
13175         return 0;
13176 }
13177
13178 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13179 {
13180         int ret;
13181
13182         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13183         if (ret)
13184                 return ret;
13185
13186         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13187 }
13188
13189 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13190 {
13191         struct btrfs_block_group_cache *cache;
13192         struct btrfs_path path;
13193         struct extent_buffer *leaf;
13194         struct btrfs_chunk *chunk;
13195         struct btrfs_key key;
13196         int ret;
13197         u64 start;
13198
13199         btrfs_init_path(&path);
13200         key.objectid = 0;
13201         key.type = BTRFS_CHUNK_ITEM_KEY;
13202         key.offset = 0;
13203         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13204         if (ret < 0) {
13205                 btrfs_release_path(&path);
13206                 return ret;
13207         }
13208
13209         /*
13210          * We do this in case the block groups were screwed up and had alloc
13211          * bits that aren't actually set on the chunks.  This happens with
13212          * restored images every time and could happen in real life I guess.
13213          */
13214         fs_info->avail_data_alloc_bits = 0;
13215         fs_info->avail_metadata_alloc_bits = 0;
13216         fs_info->avail_system_alloc_bits = 0;
13217
13218         /* First we need to create the in-memory block groups */
13219         while (1) {
13220                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13221                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13222                         if (ret < 0) {
13223                                 btrfs_release_path(&path);
13224                                 return ret;
13225                         }
13226                         if (ret) {
13227                                 ret = 0;
13228                                 break;
13229                         }
13230                 }
13231                 leaf = path.nodes[0];
13232                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13233                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13234                         path.slots[0]++;
13235                         continue;
13236                 }
13237
13238                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13239                 btrfs_add_block_group(fs_info, 0,
13240                                       btrfs_chunk_type(leaf, chunk), key.offset,
13241                                       btrfs_chunk_length(leaf, chunk));
13242                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13243                                  key.offset + btrfs_chunk_length(leaf, chunk));
13244                 path.slots[0]++;
13245         }
13246         start = 0;
13247         while (1) {
13248                 cache = btrfs_lookup_first_block_group(fs_info, start);
13249                 if (!cache)
13250                         break;
13251                 cache->cached = 1;
13252                 start = cache->key.objectid + cache->key.offset;
13253         }
13254
13255         btrfs_release_path(&path);
13256         return 0;
13257 }
13258
13259 static int reset_balance(struct btrfs_trans_handle *trans,
13260                          struct btrfs_fs_info *fs_info)
13261 {
13262         struct btrfs_root *root = fs_info->tree_root;
13263         struct btrfs_path path;
13264         struct extent_buffer *leaf;
13265         struct btrfs_key key;
13266         int del_slot, del_nr = 0;
13267         int ret;
13268         int found = 0;
13269
13270         btrfs_init_path(&path);
13271         key.objectid = BTRFS_BALANCE_OBJECTID;
13272         key.type = BTRFS_BALANCE_ITEM_KEY;
13273         key.offset = 0;
13274         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13275         if (ret) {
13276                 if (ret > 0)
13277                         ret = 0;
13278                 if (!ret)
13279                         goto reinit_data_reloc;
13280                 else
13281                         goto out;
13282         }
13283
13284         ret = btrfs_del_item(trans, root, &path);
13285         if (ret)
13286                 goto out;
13287         btrfs_release_path(&path);
13288
13289         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13290         key.type = BTRFS_ROOT_ITEM_KEY;
13291         key.offset = 0;
13292         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13293         if (ret < 0)
13294                 goto out;
13295         while (1) {
13296                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13297                         if (!found)
13298                                 break;
13299
13300                         if (del_nr) {
13301                                 ret = btrfs_del_items(trans, root, &path,
13302                                                       del_slot, del_nr);
13303                                 del_nr = 0;
13304                                 if (ret)
13305                                         goto out;
13306                         }
13307                         key.offset++;
13308                         btrfs_release_path(&path);
13309
13310                         found = 0;
13311                         ret = btrfs_search_slot(trans, root, &key, &path,
13312                                                 -1, 1);
13313                         if (ret < 0)
13314                                 goto out;
13315                         continue;
13316                 }
13317                 found = 1;
13318                 leaf = path.nodes[0];
13319                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13320                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13321                         break;
13322                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13323                         path.slots[0]++;
13324                         continue;
13325                 }
13326                 if (!del_nr) {
13327                         del_slot = path.slots[0];
13328                         del_nr = 1;
13329                 } else {
13330                         del_nr++;
13331                 }
13332                 path.slots[0]++;
13333         }
13334
13335         if (del_nr) {
13336                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13337                 if (ret)
13338                         goto out;
13339         }
13340         btrfs_release_path(&path);
13341
13342 reinit_data_reloc:
13343         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13344         key.type = BTRFS_ROOT_ITEM_KEY;
13345         key.offset = (u64)-1;
13346         root = btrfs_read_fs_root(fs_info, &key);
13347         if (IS_ERR(root)) {
13348                 fprintf(stderr, "Error reading data reloc tree\n");
13349                 ret = PTR_ERR(root);
13350                 goto out;
13351         }
13352         record_root_in_trans(trans, root);
13353         ret = btrfs_fsck_reinit_root(trans, root, 0);
13354         if (ret)
13355                 goto out;
13356         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13357 out:
13358         btrfs_release_path(&path);
13359         return ret;
13360 }
13361
13362 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13363                               struct btrfs_fs_info *fs_info)
13364 {
13365         u64 start = 0;
13366         int ret;
13367
13368         /*
13369          * The only reason we don't do this is because right now we're just
13370          * walking the trees we find and pinning down their bytes, we don't look
13371          * at any of the leaves.  In order to do mixed groups we'd have to check
13372          * the leaves of any fs roots and pin down the bytes for any file
13373          * extents we find.  Not hard but why do it if we don't have to?
13374          */
13375         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13376                 fprintf(stderr, "We don't support re-initing the extent tree "
13377                         "for mixed block groups yet, please notify a btrfs "
13378                         "developer you want to do this so they can add this "
13379                         "functionality.\n");
13380                 return -EINVAL;
13381         }
13382
13383         /*
13384          * first we need to walk all of the trees except the extent tree and pin
13385          * down the bytes that are in use so we don't overwrite any existing
13386          * metadata.
13387          */
13388         ret = pin_metadata_blocks(fs_info);
13389         if (ret) {
13390                 fprintf(stderr, "error pinning down used bytes\n");
13391                 return ret;
13392         }
13393
13394         /*
13395          * Need to drop all the block groups since we're going to recreate all
13396          * of them again.
13397          */
13398         btrfs_free_block_groups(fs_info);
13399         ret = reset_block_groups(fs_info);
13400         if (ret) {
13401                 fprintf(stderr, "error resetting the block groups\n");
13402                 return ret;
13403         }
13404
13405         /* Ok we can allocate now, reinit the extent root */
13406         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13407         if (ret) {
13408                 fprintf(stderr, "extent root initialization failed\n");
13409                 /*
13410                  * When the transaction code is updated we should end the
13411                  * transaction, but for now progs only knows about commit so
13412                  * just return an error.
13413                  */
13414                 return ret;
13415         }
13416
13417         /*
13418          * Now we have all the in-memory block groups setup so we can make
13419          * allocations properly, and the metadata we care about is safe since we
13420          * pinned all of it above.
13421          */
13422         while (1) {
13423                 struct btrfs_block_group_cache *cache;
13424
13425                 cache = btrfs_lookup_first_block_group(fs_info, start);
13426                 if (!cache)
13427                         break;
13428                 start = cache->key.objectid + cache->key.offset;
13429                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13430                                         &cache->key, &cache->item,
13431                                         sizeof(cache->item));
13432                 if (ret) {
13433                         fprintf(stderr, "Error adding block group\n");
13434                         return ret;
13435                 }
13436                 btrfs_extent_post_op(trans, fs_info->extent_root);
13437         }
13438
13439         ret = reset_balance(trans, fs_info);
13440         if (ret)
13441                 fprintf(stderr, "error resetting the pending balance\n");
13442
13443         return ret;
13444 }
13445
13446 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13447 {
13448         struct btrfs_path path;
13449         struct btrfs_trans_handle *trans;
13450         struct btrfs_key key;
13451         int ret;
13452
13453         printf("Recowing metadata block %llu\n", eb->start);
13454         key.objectid = btrfs_header_owner(eb);
13455         key.type = BTRFS_ROOT_ITEM_KEY;
13456         key.offset = (u64)-1;
13457
13458         root = btrfs_read_fs_root(root->fs_info, &key);
13459         if (IS_ERR(root)) {
13460                 fprintf(stderr, "Couldn't find owner root %llu\n",
13461                         key.objectid);
13462                 return PTR_ERR(root);
13463         }
13464
13465         trans = btrfs_start_transaction(root, 1);
13466         if (IS_ERR(trans))
13467                 return PTR_ERR(trans);
13468
13469         btrfs_init_path(&path);
13470         path.lowest_level = btrfs_header_level(eb);
13471         if (path.lowest_level)
13472                 btrfs_node_key_to_cpu(eb, &key, 0);
13473         else
13474                 btrfs_item_key_to_cpu(eb, &key, 0);
13475
13476         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13477         btrfs_commit_transaction(trans, root);
13478         btrfs_release_path(&path);
13479         return ret;
13480 }
13481
13482 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13483 {
13484         struct btrfs_path path;
13485         struct btrfs_trans_handle *trans;
13486         struct btrfs_key key;
13487         int ret;
13488
13489         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13490                bad->key.type, bad->key.offset);
13491         key.objectid = bad->root_id;
13492         key.type = BTRFS_ROOT_ITEM_KEY;
13493         key.offset = (u64)-1;
13494
13495         root = btrfs_read_fs_root(root->fs_info, &key);
13496         if (IS_ERR(root)) {
13497                 fprintf(stderr, "Couldn't find owner root %llu\n",
13498                         key.objectid);
13499                 return PTR_ERR(root);
13500         }
13501
13502         trans = btrfs_start_transaction(root, 1);
13503         if (IS_ERR(trans))
13504                 return PTR_ERR(trans);
13505
13506         btrfs_init_path(&path);
13507         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13508         if (ret) {
13509                 if (ret > 0)
13510                         ret = 0;
13511                 goto out;
13512         }
13513         ret = btrfs_del_item(trans, root, &path);
13514 out:
13515         btrfs_commit_transaction(trans, root);
13516         btrfs_release_path(&path);
13517         return ret;
13518 }
13519
13520 static int zero_log_tree(struct btrfs_root *root)
13521 {
13522         struct btrfs_trans_handle *trans;
13523         int ret;
13524
13525         trans = btrfs_start_transaction(root, 1);
13526         if (IS_ERR(trans)) {
13527                 ret = PTR_ERR(trans);
13528                 return ret;
13529         }
13530         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13531         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13532         ret = btrfs_commit_transaction(trans, root);
13533         return ret;
13534 }
13535
13536 static int populate_csum(struct btrfs_trans_handle *trans,
13537                          struct btrfs_root *csum_root, char *buf, u64 start,
13538                          u64 len)
13539 {
13540         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13541         u64 offset = 0;
13542         u64 sectorsize;
13543         int ret = 0;
13544
13545         while (offset < len) {
13546                 sectorsize = fs_info->sectorsize;
13547                 ret = read_extent_data(fs_info, buf, start + offset,
13548                                        &sectorsize, 0);
13549                 if (ret)
13550                         break;
13551                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13552                                             start + offset, buf, sectorsize);
13553                 if (ret)
13554                         break;
13555                 offset += sectorsize;
13556         }
13557         return ret;
13558 }
13559
13560 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13561                                       struct btrfs_root *csum_root,
13562                                       struct btrfs_root *cur_root)
13563 {
13564         struct btrfs_path path;
13565         struct btrfs_key key;
13566         struct extent_buffer *node;
13567         struct btrfs_file_extent_item *fi;
13568         char *buf = NULL;
13569         u64 start = 0;
13570         u64 len = 0;
13571         int slot = 0;
13572         int ret = 0;
13573
13574         buf = malloc(cur_root->fs_info->sectorsize);
13575         if (!buf)
13576                 return -ENOMEM;
13577
13578         btrfs_init_path(&path);
13579         key.objectid = 0;
13580         key.offset = 0;
13581         key.type = 0;
13582         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13583         if (ret < 0)
13584                 goto out;
13585         /* Iterate all regular file extents and fill its csum */
13586         while (1) {
13587                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13588
13589                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13590                         goto next;
13591                 node = path.nodes[0];
13592                 slot = path.slots[0];
13593                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13594                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13595                         goto next;
13596                 start = btrfs_file_extent_disk_bytenr(node, fi);
13597                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13598
13599                 ret = populate_csum(trans, csum_root, buf, start, len);
13600                 if (ret == -EEXIST)
13601                         ret = 0;
13602                 if (ret < 0)
13603                         goto out;
13604 next:
13605                 /*
13606                  * TODO: if next leaf is corrupted, jump to nearest next valid
13607                  * leaf.
13608                  */
13609                 ret = btrfs_next_item(cur_root, &path);
13610                 if (ret < 0)
13611                         goto out;
13612                 if (ret > 0) {
13613                         ret = 0;
13614                         goto out;
13615                 }
13616         }
13617
13618 out:
13619         btrfs_release_path(&path);
13620         free(buf);
13621         return ret;
13622 }
13623
13624 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13625                                   struct btrfs_root *csum_root)
13626 {
13627         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13628         struct btrfs_path path;
13629         struct btrfs_root *tree_root = fs_info->tree_root;
13630         struct btrfs_root *cur_root;
13631         struct extent_buffer *node;
13632         struct btrfs_key key;
13633         int slot = 0;
13634         int ret = 0;
13635
13636         btrfs_init_path(&path);
13637         key.objectid = BTRFS_FS_TREE_OBJECTID;
13638         key.offset = 0;
13639         key.type = BTRFS_ROOT_ITEM_KEY;
13640         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13641         if (ret < 0)
13642                 goto out;
13643         if (ret > 0) {
13644                 ret = -ENOENT;
13645                 goto out;
13646         }
13647
13648         while (1) {
13649                 node = path.nodes[0];
13650                 slot = path.slots[0];
13651                 btrfs_item_key_to_cpu(node, &key, slot);
13652                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13653                         goto out;
13654                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13655                         goto next;
13656                 if (!is_fstree(key.objectid))
13657                         goto next;
13658                 key.offset = (u64)-1;
13659
13660                 cur_root = btrfs_read_fs_root(fs_info, &key);
13661                 if (IS_ERR(cur_root) || !cur_root) {
13662                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13663                                 key.objectid);
13664                         goto out;
13665                 }
13666                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13667                                 cur_root);
13668                 if (ret < 0)
13669                         goto out;
13670 next:
13671                 ret = btrfs_next_item(tree_root, &path);
13672                 if (ret > 0) {
13673                         ret = 0;
13674                         goto out;
13675                 }
13676                 if (ret < 0)
13677                         goto out;
13678         }
13679
13680 out:
13681         btrfs_release_path(&path);
13682         return ret;
13683 }
13684
13685 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13686                                       struct btrfs_root *csum_root)
13687 {
13688         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13689         struct btrfs_path path;
13690         struct btrfs_extent_item *ei;
13691         struct extent_buffer *leaf;
13692         char *buf;
13693         struct btrfs_key key;
13694         int ret;
13695
13696         btrfs_init_path(&path);
13697         key.objectid = 0;
13698         key.type = BTRFS_EXTENT_ITEM_KEY;
13699         key.offset = 0;
13700         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13701         if (ret < 0) {
13702                 btrfs_release_path(&path);
13703                 return ret;
13704         }
13705
13706         buf = malloc(csum_root->fs_info->sectorsize);
13707         if (!buf) {
13708                 btrfs_release_path(&path);
13709                 return -ENOMEM;
13710         }
13711
13712         while (1) {
13713                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13714                         ret = btrfs_next_leaf(extent_root, &path);
13715                         if (ret < 0)
13716                                 break;
13717                         if (ret) {
13718                                 ret = 0;
13719                                 break;
13720                         }
13721                 }
13722                 leaf = path.nodes[0];
13723
13724                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13725                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13726                         path.slots[0]++;
13727                         continue;
13728                 }
13729
13730                 ei = btrfs_item_ptr(leaf, path.slots[0],
13731                                     struct btrfs_extent_item);
13732                 if (!(btrfs_extent_flags(leaf, ei) &
13733                       BTRFS_EXTENT_FLAG_DATA)) {
13734                         path.slots[0]++;
13735                         continue;
13736                 }
13737
13738                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13739                                     key.offset);
13740                 if (ret)
13741                         break;
13742                 path.slots[0]++;
13743         }
13744
13745         btrfs_release_path(&path);
13746         free(buf);
13747         return ret;
13748 }
13749
13750 /*
13751  * Recalculate the csum and put it into the csum tree.
13752  *
13753  * Extent tree init will wipe out all the extent info, so in that case, we
13754  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13755  * will use fs/subvol trees to init the csum tree.
13756  */
13757 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13758                           struct btrfs_root *csum_root,
13759                           int search_fs_tree)
13760 {
13761         if (search_fs_tree)
13762                 return fill_csum_tree_from_fs(trans, csum_root);
13763         else
13764                 return fill_csum_tree_from_extent(trans, csum_root);
13765 }
13766
13767 static void free_roots_info_cache(void)
13768 {
13769         if (!roots_info_cache)
13770                 return;
13771
13772         while (!cache_tree_empty(roots_info_cache)) {
13773                 struct cache_extent *entry;
13774                 struct root_item_info *rii;
13775
13776                 entry = first_cache_extent(roots_info_cache);
13777                 if (!entry)
13778                         break;
13779                 remove_cache_extent(roots_info_cache, entry);
13780                 rii = container_of(entry, struct root_item_info, cache_extent);
13781                 free(rii);
13782         }
13783
13784         free(roots_info_cache);
13785         roots_info_cache = NULL;
13786 }
13787
13788 static int build_roots_info_cache(struct btrfs_fs_info *info)
13789 {
13790         int ret = 0;
13791         struct btrfs_key key;
13792         struct extent_buffer *leaf;
13793         struct btrfs_path path;
13794
13795         if (!roots_info_cache) {
13796                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13797                 if (!roots_info_cache)
13798                         return -ENOMEM;
13799                 cache_tree_init(roots_info_cache);
13800         }
13801
13802         btrfs_init_path(&path);
13803         key.objectid = 0;
13804         key.type = BTRFS_EXTENT_ITEM_KEY;
13805         key.offset = 0;
13806         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13807         if (ret < 0)
13808                 goto out;
13809         leaf = path.nodes[0];
13810
13811         while (1) {
13812                 struct btrfs_key found_key;
13813                 struct btrfs_extent_item *ei;
13814                 struct btrfs_extent_inline_ref *iref;
13815                 int slot = path.slots[0];
13816                 int type;
13817                 u64 flags;
13818                 u64 root_id;
13819                 u8 level;
13820                 struct cache_extent *entry;
13821                 struct root_item_info *rii;
13822
13823                 if (slot >= btrfs_header_nritems(leaf)) {
13824                         ret = btrfs_next_leaf(info->extent_root, &path);
13825                         if (ret < 0) {
13826                                 break;
13827                         } else if (ret) {
13828                                 ret = 0;
13829                                 break;
13830                         }
13831                         leaf = path.nodes[0];
13832                         slot = path.slots[0];
13833                 }
13834
13835                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13836
13837                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13838                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13839                         goto next;
13840
13841                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13842                 flags = btrfs_extent_flags(leaf, ei);
13843
13844                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13845                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13846                         goto next;
13847
13848                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13849                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13850                         level = found_key.offset;
13851                 } else {
13852                         struct btrfs_tree_block_info *binfo;
13853
13854                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13855                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13856                         level = btrfs_tree_block_level(leaf, binfo);
13857                 }
13858
13859                 /*
13860                  * For a root extent, it must be of the following type and the
13861                  * first (and only one) iref in the item.
13862                  */
13863                 type = btrfs_extent_inline_ref_type(leaf, iref);
13864                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13865                         goto next;
13866
13867                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13868                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13869                 if (!entry) {
13870                         rii = malloc(sizeof(struct root_item_info));
13871                         if (!rii) {
13872                                 ret = -ENOMEM;
13873                                 goto out;
13874                         }
13875                         rii->cache_extent.start = root_id;
13876                         rii->cache_extent.size = 1;
13877                         rii->level = (u8)-1;
13878                         entry = &rii->cache_extent;
13879                         ret = insert_cache_extent(roots_info_cache, entry);
13880                         ASSERT(ret == 0);
13881                 } else {
13882                         rii = container_of(entry, struct root_item_info,
13883                                            cache_extent);
13884                 }
13885
13886                 ASSERT(rii->cache_extent.start == root_id);
13887                 ASSERT(rii->cache_extent.size == 1);
13888
13889                 if (level > rii->level || rii->level == (u8)-1) {
13890                         rii->level = level;
13891                         rii->bytenr = found_key.objectid;
13892                         rii->gen = btrfs_extent_generation(leaf, ei);
13893                         rii->node_count = 1;
13894                 } else if (level == rii->level) {
13895                         rii->node_count++;
13896                 }
13897 next:
13898                 path.slots[0]++;
13899         }
13900
13901 out:
13902         btrfs_release_path(&path);
13903
13904         return ret;
13905 }
13906
13907 static int maybe_repair_root_item(struct btrfs_path *path,
13908                                   const struct btrfs_key *root_key,
13909                                   const int read_only_mode)
13910 {
13911         const u64 root_id = root_key->objectid;
13912         struct cache_extent *entry;
13913         struct root_item_info *rii;
13914         struct btrfs_root_item ri;
13915         unsigned long offset;
13916
13917         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13918         if (!entry) {
13919                 fprintf(stderr,
13920                         "Error: could not find extent items for root %llu\n",
13921                         root_key->objectid);
13922                 return -ENOENT;
13923         }
13924
13925         rii = container_of(entry, struct root_item_info, cache_extent);
13926         ASSERT(rii->cache_extent.start == root_id);
13927         ASSERT(rii->cache_extent.size == 1);
13928
13929         if (rii->node_count != 1) {
13930                 fprintf(stderr,
13931                         "Error: could not find btree root extent for root %llu\n",
13932                         root_id);
13933                 return -ENOENT;
13934         }
13935
13936         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
13937         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
13938
13939         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
13940             btrfs_root_level(&ri) != rii->level ||
13941             btrfs_root_generation(&ri) != rii->gen) {
13942
13943                 /*
13944                  * If we're in repair mode but our caller told us to not update
13945                  * the root item, i.e. just check if it needs to be updated, don't
13946                  * print this message, since the caller will call us again shortly
13947                  * for the same root item without read only mode (the caller will
13948                  * open a transaction first).
13949                  */
13950                 if (!(read_only_mode && repair))
13951                         fprintf(stderr,
13952                                 "%sroot item for root %llu,"
13953                                 " current bytenr %llu, current gen %llu, current level %u,"
13954                                 " new bytenr %llu, new gen %llu, new level %u\n",
13955                                 (read_only_mode ? "" : "fixing "),
13956                                 root_id,
13957                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
13958                                 btrfs_root_level(&ri),
13959                                 rii->bytenr, rii->gen, rii->level);
13960
13961                 if (btrfs_root_generation(&ri) > rii->gen) {
13962                         fprintf(stderr,
13963                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
13964                                 root_id, btrfs_root_generation(&ri), rii->gen);
13965                         return -EINVAL;
13966                 }
13967
13968                 if (!read_only_mode) {
13969                         btrfs_set_root_bytenr(&ri, rii->bytenr);
13970                         btrfs_set_root_level(&ri, rii->level);
13971                         btrfs_set_root_generation(&ri, rii->gen);
13972                         write_extent_buffer(path->nodes[0], &ri,
13973                                             offset, sizeof(ri));
13974                 }
13975
13976                 return 1;
13977         }
13978
13979         return 0;
13980 }
13981
13982 /*
13983  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
13984  * caused read-only snapshots to be corrupted if they were created at a moment
13985  * when the source subvolume/snapshot had orphan items. The issue was that the
13986  * on-disk root items became incorrect, referring to the pre orphan cleanup root
13987  * node instead of the post orphan cleanup root node.
13988  * So this function, and its callees, just detects and fixes those cases. Even
13989  * though the regression was for read-only snapshots, this function applies to
13990  * any snapshot/subvolume root.
13991  * This must be run before any other repair code - not doing it so, makes other
13992  * repair code delete or modify backrefs in the extent tree for example, which
13993  * will result in an inconsistent fs after repairing the root items.
13994  */
13995 static int repair_root_items(struct btrfs_fs_info *info)
13996 {
13997         struct btrfs_path path;
13998         struct btrfs_key key;
13999         struct extent_buffer *leaf;
14000         struct btrfs_trans_handle *trans = NULL;
14001         int ret = 0;
14002         int bad_roots = 0;
14003         int need_trans = 0;
14004
14005         btrfs_init_path(&path);
14006
14007         ret = build_roots_info_cache(info);
14008         if (ret)
14009                 goto out;
14010
14011         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14012         key.type = BTRFS_ROOT_ITEM_KEY;
14013         key.offset = 0;
14014
14015 again:
14016         /*
14017          * Avoid opening and committing transactions if a leaf doesn't have
14018          * any root items that need to be fixed, so that we avoid rotating
14019          * backup roots unnecessarily.
14020          */
14021         if (need_trans) {
14022                 trans = btrfs_start_transaction(info->tree_root, 1);
14023                 if (IS_ERR(trans)) {
14024                         ret = PTR_ERR(trans);
14025                         goto out;
14026                 }
14027         }
14028
14029         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14030                                 0, trans ? 1 : 0);
14031         if (ret < 0)
14032                 goto out;
14033         leaf = path.nodes[0];
14034
14035         while (1) {
14036                 struct btrfs_key found_key;
14037
14038                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14039                         int no_more_keys = find_next_key(&path, &key);
14040
14041                         btrfs_release_path(&path);
14042                         if (trans) {
14043                                 ret = btrfs_commit_transaction(trans,
14044                                                                info->tree_root);
14045                                 trans = NULL;
14046                                 if (ret < 0)
14047                                         goto out;
14048                         }
14049                         need_trans = 0;
14050                         if (no_more_keys)
14051                                 break;
14052                         goto again;
14053                 }
14054
14055                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14056
14057                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14058                         goto next;
14059                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14060                         goto next;
14061
14062                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14063                 if (ret < 0)
14064                         goto out;
14065                 if (ret) {
14066                         if (!trans && repair) {
14067                                 need_trans = 1;
14068                                 key = found_key;
14069                                 btrfs_release_path(&path);
14070                                 goto again;
14071                         }
14072                         bad_roots++;
14073                 }
14074 next:
14075                 path.slots[0]++;
14076         }
14077         ret = 0;
14078 out:
14079         free_roots_info_cache();
14080         btrfs_release_path(&path);
14081         if (trans)
14082                 btrfs_commit_transaction(trans, info->tree_root);
14083         if (ret < 0)
14084                 return ret;
14085
14086         return bad_roots;
14087 }
14088
14089 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14090 {
14091         struct btrfs_trans_handle *trans;
14092         struct btrfs_block_group_cache *bg_cache;
14093         u64 current = 0;
14094         int ret = 0;
14095
14096         /* Clear all free space cache inodes and its extent data */
14097         while (1) {
14098                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14099                 if (!bg_cache)
14100                         break;
14101                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14102                 if (ret < 0)
14103                         return ret;
14104                 current = bg_cache->key.objectid + bg_cache->key.offset;
14105         }
14106
14107         /* Don't forget to set cache_generation to -1 */
14108         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14109         if (IS_ERR(trans)) {
14110                 error("failed to update super block cache generation");
14111                 return PTR_ERR(trans);
14112         }
14113         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14114         btrfs_commit_transaction(trans, fs_info->tree_root);
14115
14116         return ret;
14117 }
14118
14119 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14120                 int clear_version)
14121 {
14122         int ret = 0;
14123
14124         if (clear_version == 1) {
14125                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14126                         error(
14127                 "free space cache v2 detected, use --clear-space-cache v2");
14128                         ret = 1;
14129                         goto close_out;
14130                 }
14131                 printf("Clearing free space cache\n");
14132                 ret = clear_free_space_cache(fs_info);
14133                 if (ret) {
14134                         error("failed to clear free space cache");
14135                         ret = 1;
14136                 } else {
14137                         printf("Free space cache cleared\n");
14138                 }
14139         } else if (clear_version == 2) {
14140                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14141                         printf("no free space cache v2 to clear\n");
14142                         ret = 0;
14143                         goto close_out;
14144                 }
14145                 printf("Clear free space cache v2\n");
14146                 ret = btrfs_clear_free_space_tree(fs_info);
14147                 if (ret) {
14148                         error("failed to clear free space cache v2: %d", ret);
14149                         ret = 1;
14150                 } else {
14151                         printf("free space cache v2 cleared\n");
14152                 }
14153         }
14154 close_out:
14155         return ret;
14156 }
14157
14158 const char * const cmd_check_usage[] = {
14159         "btrfs check [options] <device>",
14160         "Check structural integrity of a filesystem (unmounted).",
14161         "Check structural integrity of an unmounted filesystem. Verify internal",
14162         "trees' consistency and item connectivity. In the repair mode try to",
14163         "fix the problems found. ",
14164         "WARNING: the repair mode is considered dangerous",
14165         "",
14166         "-s|--super <superblock>     use this superblock copy",
14167         "-b|--backup                 use the first valid backup root copy",
14168         "--force                     skip mount checks, repair is not possible",
14169         "--repair                    try to repair the filesystem",
14170         "--readonly                  run in read-only mode (default)",
14171         "--init-csum-tree            create a new CRC tree",
14172         "--init-extent-tree          create a new extent tree",
14173         "--mode <MODE>               allows choice of memory/IO trade-offs",
14174         "                            where MODE is one of:",
14175         "                            original - read inodes and extents to memory (requires",
14176         "                                       more memory, does less IO)",
14177         "                            lowmem   - try to use less memory but read blocks again",
14178         "                                       when needed",
14179         "--check-data-csum           verify checksums of data blocks",
14180         "-Q|--qgroup-report          print a report on qgroup consistency",
14181         "-E|--subvol-extents <subvolid>",
14182         "                            print subvolume extents and sharing state",
14183         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14184         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14185         "-p|--progress               indicate progress",
14186         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14187         NULL
14188 };
14189
14190 int cmd_check(int argc, char **argv)
14191 {
14192         struct cache_tree root_cache;
14193         struct btrfs_root *root;
14194         struct btrfs_fs_info *info;
14195         u64 bytenr = 0;
14196         u64 subvolid = 0;
14197         u64 tree_root_bytenr = 0;
14198         u64 chunk_root_bytenr = 0;
14199         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14200         int ret = 0;
14201         int err = 0;
14202         u64 num;
14203         int init_csum_tree = 0;
14204         int readonly = 0;
14205         int clear_space_cache = 0;
14206         int qgroup_report = 0;
14207         int qgroups_repaired = 0;
14208         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14209         int force = 0;
14210
14211         while(1) {
14212                 int c;
14213                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14214                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14215                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14216                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14217                         GETOPT_VAL_FORCE };
14218                 static const struct option long_options[] = {
14219                         { "super", required_argument, NULL, 's' },
14220                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14221                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14222                         { "init-csum-tree", no_argument, NULL,
14223                                 GETOPT_VAL_INIT_CSUM },
14224                         { "init-extent-tree", no_argument, NULL,
14225                                 GETOPT_VAL_INIT_EXTENT },
14226                         { "check-data-csum", no_argument, NULL,
14227                                 GETOPT_VAL_CHECK_CSUM },
14228                         { "backup", no_argument, NULL, 'b' },
14229                         { "subvol-extents", required_argument, NULL, 'E' },
14230                         { "qgroup-report", no_argument, NULL, 'Q' },
14231                         { "tree-root", required_argument, NULL, 'r' },
14232                         { "chunk-root", required_argument, NULL,
14233                                 GETOPT_VAL_CHUNK_TREE },
14234                         { "progress", no_argument, NULL, 'p' },
14235                         { "mode", required_argument, NULL,
14236                                 GETOPT_VAL_MODE },
14237                         { "clear-space-cache", required_argument, NULL,
14238                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14239                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14240                         { NULL, 0, NULL, 0}
14241                 };
14242
14243                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14244                 if (c < 0)
14245                         break;
14246                 switch(c) {
14247                         case 'a': /* ignored */ break;
14248                         case 'b':
14249                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14250                                 break;
14251                         case 's':
14252                                 num = arg_strtou64(optarg);
14253                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14254                                         error(
14255                                         "super mirror should be less than %d",
14256                                                 BTRFS_SUPER_MIRROR_MAX);
14257                                         exit(1);
14258                                 }
14259                                 bytenr = btrfs_sb_offset(((int)num));
14260                                 printf("using SB copy %llu, bytenr %llu\n", num,
14261                                        (unsigned long long)bytenr);
14262                                 break;
14263                         case 'Q':
14264                                 qgroup_report = 1;
14265                                 break;
14266                         case 'E':
14267                                 subvolid = arg_strtou64(optarg);
14268                                 break;
14269                         case 'r':
14270                                 tree_root_bytenr = arg_strtou64(optarg);
14271                                 break;
14272                         case GETOPT_VAL_CHUNK_TREE:
14273                                 chunk_root_bytenr = arg_strtou64(optarg);
14274                                 break;
14275                         case 'p':
14276                                 ctx.progress_enabled = true;
14277                                 break;
14278                         case '?':
14279                         case 'h':
14280                                 usage(cmd_check_usage);
14281                         case GETOPT_VAL_REPAIR:
14282                                 printf("enabling repair mode\n");
14283                                 repair = 1;
14284                                 ctree_flags |= OPEN_CTREE_WRITES;
14285                                 break;
14286                         case GETOPT_VAL_READONLY:
14287                                 readonly = 1;
14288                                 break;
14289                         case GETOPT_VAL_INIT_CSUM:
14290                                 printf("Creating a new CRC tree\n");
14291                                 init_csum_tree = 1;
14292                                 repair = 1;
14293                                 ctree_flags |= OPEN_CTREE_WRITES;
14294                                 break;
14295                         case GETOPT_VAL_INIT_EXTENT:
14296                                 init_extent_tree = 1;
14297                                 ctree_flags |= (OPEN_CTREE_WRITES |
14298                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14299                                 repair = 1;
14300                                 break;
14301                         case GETOPT_VAL_CHECK_CSUM:
14302                                 check_data_csum = 1;
14303                                 break;
14304                         case GETOPT_VAL_MODE:
14305                                 check_mode = parse_check_mode(optarg);
14306                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14307                                         error("unknown mode: %s", optarg);
14308                                         exit(1);
14309                                 }
14310                                 break;
14311                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14312                                 if (strcmp(optarg, "v1") == 0) {
14313                                         clear_space_cache = 1;
14314                                 } else if (strcmp(optarg, "v2") == 0) {
14315                                         clear_space_cache = 2;
14316                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14317                                 } else {
14318                                         error(
14319                 "invalid argument to --clear-space-cache, must be v1 or v2");
14320                                         exit(1);
14321                                 }
14322                                 ctree_flags |= OPEN_CTREE_WRITES;
14323                                 break;
14324                         case GETOPT_VAL_FORCE:
14325                                 force = 1;
14326                                 break;
14327                 }
14328         }
14329
14330         if (check_argc_exact(argc - optind, 1))
14331                 usage(cmd_check_usage);
14332
14333         if (ctx.progress_enabled) {
14334                 ctx.tp = TASK_NOTHING;
14335                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14336         }
14337
14338         /* This check is the only reason for --readonly to exist */
14339         if (readonly && repair) {
14340                 error("repair options are not compatible with --readonly");
14341                 exit(1);
14342         }
14343
14344         /*
14345          * experimental and dangerous
14346          */
14347         if (repair && check_mode == CHECK_MODE_LOWMEM)
14348                 warning("low-memory mode repair support is only partial");
14349
14350         radix_tree_init();
14351         cache_tree_init(&root_cache);
14352
14353         ret = check_mounted(argv[optind]);
14354         if (!force) {
14355                 if (ret < 0) {
14356                         error("could not check mount status: %s",
14357                                         strerror(-ret));
14358                         err |= !!ret;
14359                         goto err_out;
14360                 } else if (ret) {
14361                         error(
14362 "%s is currently mounted, use --force if you really intend to check the filesystem",
14363                                 argv[optind]);
14364                         ret = -EBUSY;
14365                         err |= !!ret;
14366                         goto err_out;
14367                 }
14368         } else {
14369                 if (repair) {
14370                         error("repair and --force is not yet supported");
14371                         ret = 1;
14372                         err |= !!ret;
14373                         goto err_out;
14374                 }
14375                 if (ret < 0) {
14376                         warning(
14377 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14378                                 argv[optind]);
14379                 } else if (ret) {
14380                         warning(
14381                         "filesystem mounted, continuing because of --force");
14382                 }
14383                 /* A block device is mounted in exclusive mode by kernel */
14384                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14385         }
14386
14387         /* only allow partial opening under repair mode */
14388         if (repair)
14389                 ctree_flags |= OPEN_CTREE_PARTIAL;
14390
14391         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14392                                   chunk_root_bytenr, ctree_flags);
14393         if (!info) {
14394                 error("cannot open file system");
14395                 ret = -EIO;
14396                 err |= !!ret;
14397                 goto err_out;
14398         }
14399
14400         global_info = info;
14401         root = info->fs_root;
14402         uuid_unparse(info->super_copy->fsid, uuidbuf);
14403
14404         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14405
14406         /*
14407          * Check the bare minimum before starting anything else that could rely
14408          * on it, namely the tree roots, any local consistency checks
14409          */
14410         if (!extent_buffer_uptodate(info->tree_root->node) ||
14411             !extent_buffer_uptodate(info->dev_root->node) ||
14412             !extent_buffer_uptodate(info->chunk_root->node)) {
14413                 error("critical roots corrupted, unable to check the filesystem");
14414                 err |= !!ret;
14415                 ret = -EIO;
14416                 goto close_out;
14417         }
14418
14419         if (clear_space_cache) {
14420                 ret = do_clear_free_space_cache(info, clear_space_cache);
14421                 err |= !!ret;
14422                 goto close_out;
14423         }
14424
14425         /*
14426          * repair mode will force us to commit transaction which
14427          * will make us fail to load log tree when mounting.
14428          */
14429         if (repair && btrfs_super_log_root(info->super_copy)) {
14430                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14431                 if (!ret) {
14432                         ret = 1;
14433                         err |= !!ret;
14434                         goto close_out;
14435                 }
14436                 ret = zero_log_tree(root);
14437                 err |= !!ret;
14438                 if (ret) {
14439                         error("failed to zero log tree: %d", ret);
14440                         goto close_out;
14441                 }
14442         }
14443
14444         if (qgroup_report) {
14445                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14446                        uuidbuf);
14447                 ret = qgroup_verify_all(info);
14448                 err |= !!ret;
14449                 if (ret == 0)
14450                         report_qgroups(1);
14451                 goto close_out;
14452         }
14453         if (subvolid) {
14454                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14455                        subvolid, argv[optind], uuidbuf);
14456                 ret = print_extent_state(info, subvolid);
14457                 err |= !!ret;
14458                 goto close_out;
14459         }
14460
14461         if (init_extent_tree || init_csum_tree) {
14462                 struct btrfs_trans_handle *trans;
14463
14464                 trans = btrfs_start_transaction(info->extent_root, 0);
14465                 if (IS_ERR(trans)) {
14466                         error("error starting transaction");
14467                         ret = PTR_ERR(trans);
14468                         err |= !!ret;
14469                         goto close_out;
14470                 }
14471
14472                 if (init_extent_tree) {
14473                         printf("Creating a new extent tree\n");
14474                         ret = reinit_extent_tree(trans, info);
14475                         err |= !!ret;
14476                         if (ret)
14477                                 goto close_out;
14478                 }
14479
14480                 if (init_csum_tree) {
14481                         printf("Reinitialize checksum tree\n");
14482                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14483                         if (ret) {
14484                                 error("checksum tree initialization failed: %d",
14485                                                 ret);
14486                                 ret = -EIO;
14487                                 err |= !!ret;
14488                                 goto close_out;
14489                         }
14490
14491                         ret = fill_csum_tree(trans, info->csum_root,
14492                                              init_extent_tree);
14493                         err |= !!ret;
14494                         if (ret) {
14495                                 error("checksum tree refilling failed: %d", ret);
14496                                 return -EIO;
14497                         }
14498                 }
14499                 /*
14500                  * Ok now we commit and run the normal fsck, which will add
14501                  * extent entries for all of the items it finds.
14502                  */
14503                 ret = btrfs_commit_transaction(trans, info->extent_root);
14504                 err |= !!ret;
14505                 if (ret)
14506                         goto close_out;
14507         }
14508         if (!extent_buffer_uptodate(info->extent_root->node)) {
14509                 error("critical: extent_root, unable to check the filesystem");
14510                 ret = -EIO;
14511                 err |= !!ret;
14512                 goto close_out;
14513         }
14514         if (!extent_buffer_uptodate(info->csum_root->node)) {
14515                 error("critical: csum_root, unable to check the filesystem");
14516                 ret = -EIO;
14517                 err |= !!ret;
14518                 goto close_out;
14519         }
14520
14521         if (!init_extent_tree) {
14522                 ret = repair_root_items(info);
14523                 if (ret < 0) {
14524                         err = !!ret;
14525                         error("failed to repair root items: %s", strerror(-ret));
14526                         goto close_out;
14527                 }
14528                 if (repair) {
14529                         fprintf(stderr, "Fixed %d roots.\n", ret);
14530                         ret = 0;
14531                 } else if (ret > 0) {
14532                         fprintf(stderr,
14533                                 "Found %d roots with an outdated root item.\n",
14534                                 ret);
14535                         fprintf(stderr,
14536         "Please run a filesystem check with the option --repair to fix them.\n");
14537                         ret = 1;
14538                         err |= ret;
14539                         goto close_out;
14540                 }
14541         }
14542
14543         ret = do_check_chunks_and_extents(info);
14544         err |= !!ret;
14545         if (ret)
14546                 error(
14547                 "errors found in extent allocation tree or chunk allocation");
14548
14549         /* Only re-check super size after we checked and repaired the fs */
14550         err |= !is_super_size_valid(info);
14551
14552         if (!ctx.progress_enabled) {
14553                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14554                         fprintf(stderr, "checking free space tree\n");
14555                 else
14556                         fprintf(stderr, "checking free space cache\n");
14557         }
14558         ret = check_space_cache(root);
14559         err |= !!ret;
14560         if (ret) {
14561                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14562                         error("errors found in free space tree");
14563                 else
14564                         error("errors found in free space cache");
14565                 goto out;
14566         }
14567
14568         /*
14569          * We used to have to have these hole extents in between our real
14570          * extents so if we don't have this flag set we need to make sure there
14571          * are no gaps in the file extents for inodes, otherwise we can just
14572          * ignore it when this happens.
14573          */
14574         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14575         ret = do_check_fs_roots(info, &root_cache);
14576         err |= !!ret;
14577         if (ret) {
14578                 error("errors found in fs roots");
14579                 goto out;
14580         }
14581
14582         fprintf(stderr, "checking csums\n");
14583         ret = check_csums(root);
14584         err |= !!ret;
14585         if (ret) {
14586                 error("errors found in csum tree");
14587                 goto out;
14588         }
14589
14590         fprintf(stderr, "checking root refs\n");
14591         /* For low memory mode, check_fs_roots_v2 handles root refs */
14592         if (check_mode != CHECK_MODE_LOWMEM) {
14593                 ret = check_root_refs(root, &root_cache);
14594                 err |= !!ret;
14595                 if (ret) {
14596                         error("errors found in root refs");
14597                         goto out;
14598                 }
14599         }
14600
14601         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14602                 struct extent_buffer *eb;
14603
14604                 eb = list_first_entry(&root->fs_info->recow_ebs,
14605                                       struct extent_buffer, recow);
14606                 list_del_init(&eb->recow);
14607                 ret = recow_extent_buffer(root, eb);
14608                 err |= !!ret;
14609                 if (ret) {
14610                         error("fails to fix transid errors");
14611                         break;
14612                 }
14613         }
14614
14615         while (!list_empty(&delete_items)) {
14616                 struct bad_item *bad;
14617
14618                 bad = list_first_entry(&delete_items, struct bad_item, list);
14619                 list_del_init(&bad->list);
14620                 if (repair) {
14621                         ret = delete_bad_item(root, bad);
14622                         err |= !!ret;
14623                 }
14624                 free(bad);
14625         }
14626
14627         if (info->quota_enabled) {
14628                 fprintf(stderr, "checking quota groups\n");
14629                 ret = qgroup_verify_all(info);
14630                 err |= !!ret;
14631                 if (ret) {
14632                         error("failed to check quota groups");
14633                         goto out;
14634                 }
14635                 report_qgroups(0);
14636                 ret = repair_qgroups(info, &qgroups_repaired);
14637                 err |= !!ret;
14638                 if (err) {
14639                         error("failed to repair quota groups");
14640                         goto out;
14641                 }
14642                 ret = 0;
14643         }
14644
14645         if (!list_empty(&root->fs_info->recow_ebs)) {
14646                 error("transid errors in file system");
14647                 ret = 1;
14648                 err |= !!ret;
14649         }
14650 out:
14651         printf("found %llu bytes used, ",
14652                (unsigned long long)bytes_used);
14653         if (err)
14654                 printf("error(s) found\n");
14655         else
14656                 printf("no error found\n");
14657         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14658         printf("total tree bytes: %llu\n",
14659                (unsigned long long)total_btree_bytes);
14660         printf("total fs tree bytes: %llu\n",
14661                (unsigned long long)total_fs_tree_bytes);
14662         printf("total extent tree bytes: %llu\n",
14663                (unsigned long long)total_extent_tree_bytes);
14664         printf("btree space waste bytes: %llu\n",
14665                (unsigned long long)btree_space_waste);
14666         printf("file data blocks allocated: %llu\n referenced %llu\n",
14667                 (unsigned long long)data_bytes_allocated,
14668                 (unsigned long long)data_bytes_referenced);
14669
14670         free_qgroup_counts();
14671         free_root_recs_tree(&root_cache);
14672 close_out:
14673         close_ctree(root);
14674 err_out:
14675         if (ctx.progress_enabled)
14676                 task_deinit(ctx.info);
14677
14678         return err;
14679 }