79a2d595187aa8b6ce1b051b1d3a23b6d2740548
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47 #include "check/lowmem.h"
48 #include "check/common.h"
49
50 enum task_position {
51         TASK_EXTENTS,
52         TASK_FREE_SPACE,
53         TASK_FS_ROOTS,
54         TASK_NOTHING, /* have to be the last element */
55 };
56
57 struct task_ctx {
58         int progress_enabled;
59         enum task_position tp;
60
61         struct task_info *info;
62 };
63
64 u64 bytes_used = 0;
65 u64 total_csum_bytes = 0;
66 u64 total_btree_bytes = 0;
67 u64 total_fs_tree_bytes = 0;
68 u64 total_extent_tree_bytes = 0;
69 u64 btree_space_waste = 0;
70 u64 data_bytes_allocated = 0;
71 u64 data_bytes_referenced = 0;
72 LIST_HEAD(duplicate_extents);
73 LIST_HEAD(delete_items);
74 int no_holes = 0;
75 int init_extent_tree = 0;
76 int check_data_csum = 0;
77 struct btrfs_fs_info *global_info;
78 struct task_ctx ctx = { 0 };
79 struct cache_tree *roots_info_cache = NULL;
80
81 enum btrfs_check_mode {
82         CHECK_MODE_ORIGINAL,
83         CHECK_MODE_LOWMEM,
84         CHECK_MODE_UNKNOWN,
85         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
86 };
87
88 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
89
90 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
91 {
92         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
93         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
94         struct data_backref *back1 = to_data_backref(ext1);
95         struct data_backref *back2 = to_data_backref(ext2);
96
97         WARN_ON(!ext1->is_data);
98         WARN_ON(!ext2->is_data);
99
100         /* parent and root are a union, so this covers both */
101         if (back1->parent > back2->parent)
102                 return 1;
103         if (back1->parent < back2->parent)
104                 return -1;
105
106         /* This is a full backref and the parents match. */
107         if (back1->node.full_backref)
108                 return 0;
109
110         if (back1->owner > back2->owner)
111                 return 1;
112         if (back1->owner < back2->owner)
113                 return -1;
114
115         if (back1->offset > back2->offset)
116                 return 1;
117         if (back1->offset < back2->offset)
118                 return -1;
119
120         if (back1->found_ref && back2->found_ref) {
121                 if (back1->disk_bytenr > back2->disk_bytenr)
122                         return 1;
123                 if (back1->disk_bytenr < back2->disk_bytenr)
124                         return -1;
125
126                 if (back1->bytes > back2->bytes)
127                         return 1;
128                 if (back1->bytes < back2->bytes)
129                         return -1;
130         }
131
132         return 0;
133 }
134
135 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
136 {
137         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
138         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
139         struct tree_backref *back1 = to_tree_backref(ext1);
140         struct tree_backref *back2 = to_tree_backref(ext2);
141
142         WARN_ON(ext1->is_data);
143         WARN_ON(ext2->is_data);
144
145         /* parent and root are a union, so this covers both */
146         if (back1->parent > back2->parent)
147                 return 1;
148         if (back1->parent < back2->parent)
149                 return -1;
150
151         return 0;
152 }
153
154 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
155 {
156         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
157         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
158
159         if (ext1->is_data > ext2->is_data)
160                 return 1;
161
162         if (ext1->is_data < ext2->is_data)
163                 return -1;
164
165         if (ext1->full_backref > ext2->full_backref)
166                 return 1;
167         if (ext1->full_backref < ext2->full_backref)
168                 return -1;
169
170         if (ext1->is_data)
171                 return compare_data_backref(node1, node2);
172         else
173                 return compare_tree_backref(node1, node2);
174 }
175
176
177 static void *print_status_check(void *p)
178 {
179         struct task_ctx *priv = p;
180         const char work_indicator[] = { '.', 'o', 'O', 'o' };
181         uint32_t count = 0;
182         static char *task_position_string[] = {
183                 "checking extents",
184                 "checking free space cache",
185                 "checking fs roots",
186         };
187
188         task_period_start(priv->info, 1000 /* 1s */);
189
190         if (priv->tp == TASK_NOTHING)
191                 return NULL;
192
193         while (1) {
194                 printf("%s [%c]\r", task_position_string[priv->tp],
195                                 work_indicator[count % 4]);
196                 count++;
197                 fflush(stdout);
198                 task_period_wait(priv->info);
199         }
200         return NULL;
201 }
202
203 static int print_status_return(void *p)
204 {
205         printf("\n");
206         fflush(stdout);
207
208         return 0;
209 }
210
211 static enum btrfs_check_mode parse_check_mode(const char *str)
212 {
213         if (strcmp(str, "lowmem") == 0)
214                 return CHECK_MODE_LOWMEM;
215         if (strcmp(str, "orig") == 0)
216                 return CHECK_MODE_ORIGINAL;
217         if (strcmp(str, "original") == 0)
218                 return CHECK_MODE_ORIGINAL;
219
220         return CHECK_MODE_UNKNOWN;
221 }
222
223 /* Compatible function to allow reuse of old codes */
224 static u64 first_extent_gap(struct rb_root *holes)
225 {
226         struct file_extent_hole *hole;
227
228         if (RB_EMPTY_ROOT(holes))
229                 return (u64)-1;
230
231         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
232         return hole->start;
233 }
234
235 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
236 {
237         struct file_extent_hole *hole1;
238         struct file_extent_hole *hole2;
239
240         hole1 = rb_entry(node1, struct file_extent_hole, node);
241         hole2 = rb_entry(node2, struct file_extent_hole, node);
242
243         if (hole1->start > hole2->start)
244                 return -1;
245         if (hole1->start < hole2->start)
246                 return 1;
247         /* Now hole1->start == hole2->start */
248         if (hole1->len >= hole2->len)
249                 /*
250                  * Hole 1 will be merge center
251                  * Same hole will be merged later
252                  */
253                 return -1;
254         /* Hole 2 will be merge center */
255         return 1;
256 }
257
258 /*
259  * Add a hole to the record
260  *
261  * This will do hole merge for copy_file_extent_holes(),
262  * which will ensure there won't be continuous holes.
263  */
264 static int add_file_extent_hole(struct rb_root *holes,
265                                 u64 start, u64 len)
266 {
267         struct file_extent_hole *hole;
268         struct file_extent_hole *prev = NULL;
269         struct file_extent_hole *next = NULL;
270
271         hole = malloc(sizeof(*hole));
272         if (!hole)
273                 return -ENOMEM;
274         hole->start = start;
275         hole->len = len;
276         /* Since compare will not return 0, no -EEXIST will happen */
277         rb_insert(holes, &hole->node, compare_hole);
278
279         /* simple merge with previous hole */
280         if (rb_prev(&hole->node))
281                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
282                                 node);
283         if (prev && prev->start + prev->len >= hole->start) {
284                 hole->len = hole->start + hole->len - prev->start;
285                 hole->start = prev->start;
286                 rb_erase(&prev->node, holes);
287                 free(prev);
288                 prev = NULL;
289         }
290
291         /* iterate merge with next holes */
292         while (1) {
293                 if (!rb_next(&hole->node))
294                         break;
295                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
296                                         node);
297                 if (hole->start + hole->len >= next->start) {
298                         if (hole->start + hole->len <= next->start + next->len)
299                                 hole->len = next->start + next->len -
300                                             hole->start;
301                         rb_erase(&next->node, holes);
302                         free(next);
303                         next = NULL;
304                 } else
305                         break;
306         }
307         return 0;
308 }
309
310 static int compare_hole_range(struct rb_node *node, void *data)
311 {
312         struct file_extent_hole *hole;
313         u64 start;
314
315         hole = (struct file_extent_hole *)data;
316         start = hole->start;
317
318         hole = rb_entry(node, struct file_extent_hole, node);
319         if (start < hole->start)
320                 return -1;
321         if (start >= hole->start && start < hole->start + hole->len)
322                 return 0;
323         return 1;
324 }
325
326 /*
327  * Delete a hole in the record
328  *
329  * This will do the hole split and is much restrict than add.
330  */
331 static int del_file_extent_hole(struct rb_root *holes,
332                                 u64 start, u64 len)
333 {
334         struct file_extent_hole *hole;
335         struct file_extent_hole tmp;
336         u64 prev_start = 0;
337         u64 prev_len = 0;
338         u64 next_start = 0;
339         u64 next_len = 0;
340         struct rb_node *node;
341         int have_prev = 0;
342         int have_next = 0;
343         int ret = 0;
344
345         tmp.start = start;
346         tmp.len = len;
347         node = rb_search(holes, &tmp, compare_hole_range, NULL);
348         if (!node)
349                 return -EEXIST;
350         hole = rb_entry(node, struct file_extent_hole, node);
351         if (start + len > hole->start + hole->len)
352                 return -EEXIST;
353
354         /*
355          * Now there will be no overlap, delete the hole and re-add the
356          * split(s) if they exists.
357          */
358         if (start > hole->start) {
359                 prev_start = hole->start;
360                 prev_len = start - hole->start;
361                 have_prev = 1;
362         }
363         if (hole->start + hole->len > start + len) {
364                 next_start = start + len;
365                 next_len = hole->start + hole->len - start - len;
366                 have_next = 1;
367         }
368         rb_erase(node, holes);
369         free(hole);
370         if (have_prev) {
371                 ret = add_file_extent_hole(holes, prev_start, prev_len);
372                 if (ret < 0)
373                         return ret;
374         }
375         if (have_next) {
376                 ret = add_file_extent_hole(holes, next_start, next_len);
377                 if (ret < 0)
378                         return ret;
379         }
380         return 0;
381 }
382
383 static int copy_file_extent_holes(struct rb_root *dst,
384                                   struct rb_root *src)
385 {
386         struct file_extent_hole *hole;
387         struct rb_node *node;
388         int ret = 0;
389
390         node = rb_first(src);
391         while (node) {
392                 hole = rb_entry(node, struct file_extent_hole, node);
393                 ret = add_file_extent_hole(dst, hole->start, hole->len);
394                 if (ret)
395                         break;
396                 node = rb_next(node);
397         }
398         return ret;
399 }
400
401 static void free_file_extent_holes(struct rb_root *holes)
402 {
403         struct rb_node *node;
404         struct file_extent_hole *hole;
405
406         node = rb_first(holes);
407         while (node) {
408                 hole = rb_entry(node, struct file_extent_hole, node);
409                 rb_erase(node, holes);
410                 free(hole);
411                 node = rb_first(holes);
412         }
413 }
414
415 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
416
417 static void record_root_in_trans(struct btrfs_trans_handle *trans,
418                                  struct btrfs_root *root)
419 {
420         if (root->last_trans != trans->transid) {
421                 root->track_dirty = 1;
422                 root->last_trans = trans->transid;
423                 root->commit_root = root->node;
424                 extent_buffer_get(root->node);
425         }
426 }
427
428 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
429 {
430         struct device_record *rec1;
431         struct device_record *rec2;
432
433         rec1 = rb_entry(node1, struct device_record, node);
434         rec2 = rb_entry(node2, struct device_record, node);
435         if (rec1->devid > rec2->devid)
436                 return -1;
437         else if (rec1->devid < rec2->devid)
438                 return 1;
439         else
440                 return 0;
441 }
442
443 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
444 {
445         struct inode_record *rec;
446         struct inode_backref *backref;
447         struct inode_backref *orig;
448         struct inode_backref *tmp;
449         struct orphan_data_extent *src_orphan;
450         struct orphan_data_extent *dst_orphan;
451         struct rb_node *rb;
452         size_t size;
453         int ret;
454
455         rec = malloc(sizeof(*rec));
456         if (!rec)
457                 return ERR_PTR(-ENOMEM);
458         memcpy(rec, orig_rec, sizeof(*rec));
459         rec->refs = 1;
460         INIT_LIST_HEAD(&rec->backrefs);
461         INIT_LIST_HEAD(&rec->orphan_extents);
462         rec->holes = RB_ROOT;
463
464         list_for_each_entry(orig, &orig_rec->backrefs, list) {
465                 size = sizeof(*orig) + orig->namelen + 1;
466                 backref = malloc(size);
467                 if (!backref) {
468                         ret = -ENOMEM;
469                         goto cleanup;
470                 }
471                 memcpy(backref, orig, size);
472                 list_add_tail(&backref->list, &rec->backrefs);
473         }
474         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
475                 dst_orphan = malloc(sizeof(*dst_orphan));
476                 if (!dst_orphan) {
477                         ret = -ENOMEM;
478                         goto cleanup;
479                 }
480                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
481                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
482         }
483         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
484         if (ret < 0)
485                 goto cleanup_rb;
486
487         return rec;
488
489 cleanup_rb:
490         rb = rb_first(&rec->holes);
491         while (rb) {
492                 struct file_extent_hole *hole;
493
494                 hole = rb_entry(rb, struct file_extent_hole, node);
495                 rb = rb_next(rb);
496                 free(hole);
497         }
498
499 cleanup:
500         if (!list_empty(&rec->backrefs))
501                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
502                         list_del(&orig->list);
503                         free(orig);
504                 }
505
506         if (!list_empty(&rec->orphan_extents))
507                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
508                         list_del(&orig->list);
509                         free(orig);
510                 }
511
512         free(rec);
513
514         return ERR_PTR(ret);
515 }
516
517 static void print_orphan_data_extents(struct list_head *orphan_extents,
518                                       u64 objectid)
519 {
520         struct orphan_data_extent *orphan;
521
522         if (list_empty(orphan_extents))
523                 return;
524         printf("The following data extent is lost in tree %llu:\n",
525                objectid);
526         list_for_each_entry(orphan, orphan_extents, list) {
527                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
528                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
529                        orphan->disk_len);
530         }
531 }
532
533 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
534 {
535         u64 root_objectid = root->root_key.objectid;
536         int errors = rec->errors;
537
538         if (!errors)
539                 return;
540         /* reloc root errors, we print its corresponding fs root objectid*/
541         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
542                 root_objectid = root->root_key.offset;
543                 fprintf(stderr, "reloc");
544         }
545         fprintf(stderr, "root %llu inode %llu errors %x",
546                 (unsigned long long) root_objectid,
547                 (unsigned long long) rec->ino, rec->errors);
548
549         if (errors & I_ERR_NO_INODE_ITEM)
550                 fprintf(stderr, ", no inode item");
551         if (errors & I_ERR_NO_ORPHAN_ITEM)
552                 fprintf(stderr, ", no orphan item");
553         if (errors & I_ERR_DUP_INODE_ITEM)
554                 fprintf(stderr, ", dup inode item");
555         if (errors & I_ERR_DUP_DIR_INDEX)
556                 fprintf(stderr, ", dup dir index");
557         if (errors & I_ERR_ODD_DIR_ITEM)
558                 fprintf(stderr, ", odd dir item");
559         if (errors & I_ERR_ODD_FILE_EXTENT)
560                 fprintf(stderr, ", odd file extent");
561         if (errors & I_ERR_BAD_FILE_EXTENT)
562                 fprintf(stderr, ", bad file extent");
563         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
564                 fprintf(stderr, ", file extent overlap");
565         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
566                 fprintf(stderr, ", file extent discount");
567         if (errors & I_ERR_DIR_ISIZE_WRONG)
568                 fprintf(stderr, ", dir isize wrong");
569         if (errors & I_ERR_FILE_NBYTES_WRONG)
570                 fprintf(stderr, ", nbytes wrong");
571         if (errors & I_ERR_ODD_CSUM_ITEM)
572                 fprintf(stderr, ", odd csum item");
573         if (errors & I_ERR_SOME_CSUM_MISSING)
574                 fprintf(stderr, ", some csum missing");
575         if (errors & I_ERR_LINK_COUNT_WRONG)
576                 fprintf(stderr, ", link count wrong");
577         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
578                 fprintf(stderr, ", orphan file extent");
579         fprintf(stderr, "\n");
580         /* Print the orphan extents if needed */
581         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
582                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
583
584         /* Print the holes if needed */
585         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
586                 struct file_extent_hole *hole;
587                 struct rb_node *node;
588                 int found = 0;
589
590                 node = rb_first(&rec->holes);
591                 fprintf(stderr, "Found file extent holes:\n");
592                 while (node) {
593                         found = 1;
594                         hole = rb_entry(node, struct file_extent_hole, node);
595                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
596                                 hole->start, hole->len);
597                         node = rb_next(node);
598                 }
599                 if (!found)
600                         fprintf(stderr, "\tstart: 0, len: %llu\n",
601                                 round_up(rec->isize,
602                                          root->fs_info->sectorsize));
603         }
604 }
605
606 static void print_ref_error(int errors)
607 {
608         if (errors & REF_ERR_NO_DIR_ITEM)
609                 fprintf(stderr, ", no dir item");
610         if (errors & REF_ERR_NO_DIR_INDEX)
611                 fprintf(stderr, ", no dir index");
612         if (errors & REF_ERR_NO_INODE_REF)
613                 fprintf(stderr, ", no inode ref");
614         if (errors & REF_ERR_DUP_DIR_ITEM)
615                 fprintf(stderr, ", dup dir item");
616         if (errors & REF_ERR_DUP_DIR_INDEX)
617                 fprintf(stderr, ", dup dir index");
618         if (errors & REF_ERR_DUP_INODE_REF)
619                 fprintf(stderr, ", dup inode ref");
620         if (errors & REF_ERR_INDEX_UNMATCH)
621                 fprintf(stderr, ", index mismatch");
622         if (errors & REF_ERR_FILETYPE_UNMATCH)
623                 fprintf(stderr, ", filetype mismatch");
624         if (errors & REF_ERR_NAME_TOO_LONG)
625                 fprintf(stderr, ", name too long");
626         if (errors & REF_ERR_NO_ROOT_REF)
627                 fprintf(stderr, ", no root ref");
628         if (errors & REF_ERR_NO_ROOT_BACKREF)
629                 fprintf(stderr, ", no root backref");
630         if (errors & REF_ERR_DUP_ROOT_REF)
631                 fprintf(stderr, ", dup root ref");
632         if (errors & REF_ERR_DUP_ROOT_BACKREF)
633                 fprintf(stderr, ", dup root backref");
634         fprintf(stderr, "\n");
635 }
636
637 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
638                                           u64 ino, int mod)
639 {
640         struct ptr_node *node;
641         struct cache_extent *cache;
642         struct inode_record *rec = NULL;
643         int ret;
644
645         cache = lookup_cache_extent(inode_cache, ino, 1);
646         if (cache) {
647                 node = container_of(cache, struct ptr_node, cache);
648                 rec = node->data;
649                 if (mod && rec->refs > 1) {
650                         node->data = clone_inode_rec(rec);
651                         if (IS_ERR(node->data))
652                                 return node->data;
653                         rec->refs--;
654                         rec = node->data;
655                 }
656         } else if (mod) {
657                 rec = calloc(1, sizeof(*rec));
658                 if (!rec)
659                         return ERR_PTR(-ENOMEM);
660                 rec->ino = ino;
661                 rec->extent_start = (u64)-1;
662                 rec->refs = 1;
663                 INIT_LIST_HEAD(&rec->backrefs);
664                 INIT_LIST_HEAD(&rec->orphan_extents);
665                 rec->holes = RB_ROOT;
666
667                 node = malloc(sizeof(*node));
668                 if (!node) {
669                         free(rec);
670                         return ERR_PTR(-ENOMEM);
671                 }
672                 node->cache.start = ino;
673                 node->cache.size = 1;
674                 node->data = rec;
675
676                 if (ino == BTRFS_FREE_INO_OBJECTID)
677                         rec->found_link = 1;
678
679                 ret = insert_cache_extent(inode_cache, &node->cache);
680                 if (ret)
681                         return ERR_PTR(-EEXIST);
682         }
683         return rec;
684 }
685
686 static void free_orphan_data_extents(struct list_head *orphan_extents)
687 {
688         struct orphan_data_extent *orphan;
689
690         while (!list_empty(orphan_extents)) {
691                 orphan = list_entry(orphan_extents->next,
692                                     struct orphan_data_extent, list);
693                 list_del(&orphan->list);
694                 free(orphan);
695         }
696 }
697
698 static void free_inode_rec(struct inode_record *rec)
699 {
700         struct inode_backref *backref;
701
702         if (--rec->refs > 0)
703                 return;
704
705         while (!list_empty(&rec->backrefs)) {
706                 backref = to_inode_backref(rec->backrefs.next);
707                 list_del(&backref->list);
708                 free(backref);
709         }
710         free_orphan_data_extents(&rec->orphan_extents);
711         free_file_extent_holes(&rec->holes);
712         free(rec);
713 }
714
715 static int can_free_inode_rec(struct inode_record *rec)
716 {
717         if (!rec->errors && rec->checked && rec->found_inode_item &&
718             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
719                 return 1;
720         return 0;
721 }
722
723 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
724                                  struct inode_record *rec)
725 {
726         struct cache_extent *cache;
727         struct inode_backref *tmp, *backref;
728         struct ptr_node *node;
729         u8 filetype;
730
731         if (!rec->found_inode_item)
732                 return;
733
734         filetype = imode_to_type(rec->imode);
735         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
736                 if (backref->found_dir_item && backref->found_dir_index) {
737                         if (backref->filetype != filetype)
738                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
739                         if (!backref->errors && backref->found_inode_ref &&
740                             rec->nlink == rec->found_link) {
741                                 list_del(&backref->list);
742                                 free(backref);
743                         }
744                 }
745         }
746
747         if (!rec->checked || rec->merging)
748                 return;
749
750         if (S_ISDIR(rec->imode)) {
751                 if (rec->found_size != rec->isize)
752                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
753                 if (rec->found_file_extent)
754                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
755         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
756                 if (rec->found_dir_item)
757                         rec->errors |= I_ERR_ODD_DIR_ITEM;
758                 if (rec->found_size != rec->nbytes)
759                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
760                 if (rec->nlink > 0 && !no_holes &&
761                     (rec->extent_end < rec->isize ||
762                      first_extent_gap(&rec->holes) < rec->isize))
763                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
764         }
765
766         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
767                 if (rec->found_csum_item && rec->nodatasum)
768                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
769                 if (rec->some_csum_missing && !rec->nodatasum)
770                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
771         }
772
773         BUG_ON(rec->refs != 1);
774         if (can_free_inode_rec(rec)) {
775                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
776                 node = container_of(cache, struct ptr_node, cache);
777                 BUG_ON(node->data != rec);
778                 remove_cache_extent(inode_cache, &node->cache);
779                 free(node);
780                 free_inode_rec(rec);
781         }
782 }
783
784 static int check_orphan_item(struct btrfs_root *root, u64 ino)
785 {
786         struct btrfs_path path;
787         struct btrfs_key key;
788         int ret;
789
790         key.objectid = BTRFS_ORPHAN_OBJECTID;
791         key.type = BTRFS_ORPHAN_ITEM_KEY;
792         key.offset = ino;
793
794         btrfs_init_path(&path);
795         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
796         btrfs_release_path(&path);
797         if (ret > 0)
798                 ret = -ENOENT;
799         return ret;
800 }
801
802 static int process_inode_item(struct extent_buffer *eb,
803                               int slot, struct btrfs_key *key,
804                               struct shared_node *active_node)
805 {
806         struct inode_record *rec;
807         struct btrfs_inode_item *item;
808
809         rec = active_node->current;
810         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
811         if (rec->found_inode_item) {
812                 rec->errors |= I_ERR_DUP_INODE_ITEM;
813                 return 1;
814         }
815         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
816         rec->nlink = btrfs_inode_nlink(eb, item);
817         rec->isize = btrfs_inode_size(eb, item);
818         rec->nbytes = btrfs_inode_nbytes(eb, item);
819         rec->imode = btrfs_inode_mode(eb, item);
820         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
821                 rec->nodatasum = 1;
822         rec->found_inode_item = 1;
823         if (rec->nlink == 0)
824                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
825         maybe_free_inode_rec(&active_node->inode_cache, rec);
826         return 0;
827 }
828
829 static struct inode_backref *get_inode_backref(struct inode_record *rec,
830                                                 const char *name,
831                                                 int namelen, u64 dir)
832 {
833         struct inode_backref *backref;
834
835         list_for_each_entry(backref, &rec->backrefs, list) {
836                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
837                         break;
838                 if (backref->dir != dir || backref->namelen != namelen)
839                         continue;
840                 if (memcmp(name, backref->name, namelen))
841                         continue;
842                 return backref;
843         }
844
845         backref = malloc(sizeof(*backref) + namelen + 1);
846         if (!backref)
847                 return NULL;
848         memset(backref, 0, sizeof(*backref));
849         backref->dir = dir;
850         backref->namelen = namelen;
851         memcpy(backref->name, name, namelen);
852         backref->name[namelen] = '\0';
853         list_add_tail(&backref->list, &rec->backrefs);
854         return backref;
855 }
856
857 static int add_inode_backref(struct cache_tree *inode_cache,
858                              u64 ino, u64 dir, u64 index,
859                              const char *name, int namelen,
860                              u8 filetype, u8 itemtype, int errors)
861 {
862         struct inode_record *rec;
863         struct inode_backref *backref;
864
865         rec = get_inode_rec(inode_cache, ino, 1);
866         BUG_ON(IS_ERR(rec));
867         backref = get_inode_backref(rec, name, namelen, dir);
868         BUG_ON(!backref);
869         if (errors)
870                 backref->errors |= errors;
871         if (itemtype == BTRFS_DIR_INDEX_KEY) {
872                 if (backref->found_dir_index)
873                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
874                 if (backref->found_inode_ref && backref->index != index)
875                         backref->errors |= REF_ERR_INDEX_UNMATCH;
876                 if (backref->found_dir_item && backref->filetype != filetype)
877                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
878
879                 backref->index = index;
880                 backref->filetype = filetype;
881                 backref->found_dir_index = 1;
882         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
883                 rec->found_link++;
884                 if (backref->found_dir_item)
885                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
886                 if (backref->found_dir_index && backref->filetype != filetype)
887                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
888
889                 backref->filetype = filetype;
890                 backref->found_dir_item = 1;
891         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
892                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
893                 if (backref->found_inode_ref)
894                         backref->errors |= REF_ERR_DUP_INODE_REF;
895                 if (backref->found_dir_index && backref->index != index)
896                         backref->errors |= REF_ERR_INDEX_UNMATCH;
897                 else
898                         backref->index = index;
899
900                 backref->ref_type = itemtype;
901                 backref->found_inode_ref = 1;
902         } else {
903                 BUG_ON(1);
904         }
905
906         maybe_free_inode_rec(inode_cache, rec);
907         return 0;
908 }
909
910 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
911                             struct cache_tree *dst_cache)
912 {
913         struct inode_backref *backref;
914         u32 dir_count = 0;
915         int ret = 0;
916
917         dst->merging = 1;
918         list_for_each_entry(backref, &src->backrefs, list) {
919                 if (backref->found_dir_index) {
920                         add_inode_backref(dst_cache, dst->ino, backref->dir,
921                                         backref->index, backref->name,
922                                         backref->namelen, backref->filetype,
923                                         BTRFS_DIR_INDEX_KEY, backref->errors);
924                 }
925                 if (backref->found_dir_item) {
926                         dir_count++;
927                         add_inode_backref(dst_cache, dst->ino,
928                                         backref->dir, 0, backref->name,
929                                         backref->namelen, backref->filetype,
930                                         BTRFS_DIR_ITEM_KEY, backref->errors);
931                 }
932                 if (backref->found_inode_ref) {
933                         add_inode_backref(dst_cache, dst->ino,
934                                         backref->dir, backref->index,
935                                         backref->name, backref->namelen, 0,
936                                         backref->ref_type, backref->errors);
937                 }
938         }
939
940         if (src->found_dir_item)
941                 dst->found_dir_item = 1;
942         if (src->found_file_extent)
943                 dst->found_file_extent = 1;
944         if (src->found_csum_item)
945                 dst->found_csum_item = 1;
946         if (src->some_csum_missing)
947                 dst->some_csum_missing = 1;
948         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
949                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
950                 if (ret < 0)
951                         return ret;
952         }
953
954         BUG_ON(src->found_link < dir_count);
955         dst->found_link += src->found_link - dir_count;
956         dst->found_size += src->found_size;
957         if (src->extent_start != (u64)-1) {
958                 if (dst->extent_start == (u64)-1) {
959                         dst->extent_start = src->extent_start;
960                         dst->extent_end = src->extent_end;
961                 } else {
962                         if (dst->extent_end > src->extent_start)
963                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
964                         else if (dst->extent_end < src->extent_start) {
965                                 ret = add_file_extent_hole(&dst->holes,
966                                         dst->extent_end,
967                                         src->extent_start - dst->extent_end);
968                         }
969                         if (dst->extent_end < src->extent_end)
970                                 dst->extent_end = src->extent_end;
971                 }
972         }
973
974         dst->errors |= src->errors;
975         if (src->found_inode_item) {
976                 if (!dst->found_inode_item) {
977                         dst->nlink = src->nlink;
978                         dst->isize = src->isize;
979                         dst->nbytes = src->nbytes;
980                         dst->imode = src->imode;
981                         dst->nodatasum = src->nodatasum;
982                         dst->found_inode_item = 1;
983                 } else {
984                         dst->errors |= I_ERR_DUP_INODE_ITEM;
985                 }
986         }
987         dst->merging = 0;
988
989         return 0;
990 }
991
992 static int splice_shared_node(struct shared_node *src_node,
993                               struct shared_node *dst_node)
994 {
995         struct cache_extent *cache;
996         struct ptr_node *node, *ins;
997         struct cache_tree *src, *dst;
998         struct inode_record *rec, *conflict;
999         u64 current_ino = 0;
1000         int splice = 0;
1001         int ret;
1002
1003         if (--src_node->refs == 0)
1004                 splice = 1;
1005         if (src_node->current)
1006                 current_ino = src_node->current->ino;
1007
1008         src = &src_node->root_cache;
1009         dst = &dst_node->root_cache;
1010 again:
1011         cache = search_cache_extent(src, 0);
1012         while (cache) {
1013                 node = container_of(cache, struct ptr_node, cache);
1014                 rec = node->data;
1015                 cache = next_cache_extent(cache);
1016
1017                 if (splice) {
1018                         remove_cache_extent(src, &node->cache);
1019                         ins = node;
1020                 } else {
1021                         ins = malloc(sizeof(*ins));
1022                         BUG_ON(!ins);
1023                         ins->cache.start = node->cache.start;
1024                         ins->cache.size = node->cache.size;
1025                         ins->data = rec;
1026                         rec->refs++;
1027                 }
1028                 ret = insert_cache_extent(dst, &ins->cache);
1029                 if (ret == -EEXIST) {
1030                         conflict = get_inode_rec(dst, rec->ino, 1);
1031                         BUG_ON(IS_ERR(conflict));
1032                         merge_inode_recs(rec, conflict, dst);
1033                         if (rec->checked) {
1034                                 conflict->checked = 1;
1035                                 if (dst_node->current == conflict)
1036                                         dst_node->current = NULL;
1037                         }
1038                         maybe_free_inode_rec(dst, conflict);
1039                         free_inode_rec(rec);
1040                         free(ins);
1041                 } else {
1042                         BUG_ON(ret);
1043                 }
1044         }
1045
1046         if (src == &src_node->root_cache) {
1047                 src = &src_node->inode_cache;
1048                 dst = &dst_node->inode_cache;
1049                 goto again;
1050         }
1051
1052         if (current_ino > 0 && (!dst_node->current ||
1053             current_ino > dst_node->current->ino)) {
1054                 if (dst_node->current) {
1055                         dst_node->current->checked = 1;
1056                         maybe_free_inode_rec(dst, dst_node->current);
1057                 }
1058                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1059                 BUG_ON(IS_ERR(dst_node->current));
1060         }
1061         return 0;
1062 }
1063
1064 static void free_inode_ptr(struct cache_extent *cache)
1065 {
1066         struct ptr_node *node;
1067         struct inode_record *rec;
1068
1069         node = container_of(cache, struct ptr_node, cache);
1070         rec = node->data;
1071         free_inode_rec(rec);
1072         free(node);
1073 }
1074
1075 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1076
1077 static struct shared_node *find_shared_node(struct cache_tree *shared,
1078                                             u64 bytenr)
1079 {
1080         struct cache_extent *cache;
1081         struct shared_node *node;
1082
1083         cache = lookup_cache_extent(shared, bytenr, 1);
1084         if (cache) {
1085                 node = container_of(cache, struct shared_node, cache);
1086                 return node;
1087         }
1088         return NULL;
1089 }
1090
1091 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1092 {
1093         int ret;
1094         struct shared_node *node;
1095
1096         node = calloc(1, sizeof(*node));
1097         if (!node)
1098                 return -ENOMEM;
1099         node->cache.start = bytenr;
1100         node->cache.size = 1;
1101         cache_tree_init(&node->root_cache);
1102         cache_tree_init(&node->inode_cache);
1103         node->refs = refs;
1104
1105         ret = insert_cache_extent(shared, &node->cache);
1106
1107         return ret;
1108 }
1109
1110 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1111                              struct walk_control *wc, int level)
1112 {
1113         struct shared_node *node;
1114         struct shared_node *dest;
1115         int ret;
1116
1117         if (level == wc->active_node)
1118                 return 0;
1119
1120         BUG_ON(wc->active_node <= level);
1121         node = find_shared_node(&wc->shared, bytenr);
1122         if (!node) {
1123                 ret = add_shared_node(&wc->shared, bytenr, refs);
1124                 BUG_ON(ret);
1125                 node = find_shared_node(&wc->shared, bytenr);
1126                 wc->nodes[level] = node;
1127                 wc->active_node = level;
1128                 return 0;
1129         }
1130
1131         if (wc->root_level == wc->active_node &&
1132             btrfs_root_refs(&root->root_item) == 0) {
1133                 if (--node->refs == 0) {
1134                         free_inode_recs_tree(&node->root_cache);
1135                         free_inode_recs_tree(&node->inode_cache);
1136                         remove_cache_extent(&wc->shared, &node->cache);
1137                         free(node);
1138                 }
1139                 return 1;
1140         }
1141
1142         dest = wc->nodes[wc->active_node];
1143         splice_shared_node(node, dest);
1144         if (node->refs == 0) {
1145                 remove_cache_extent(&wc->shared, &node->cache);
1146                 free(node);
1147         }
1148         return 1;
1149 }
1150
1151 static int leave_shared_node(struct btrfs_root *root,
1152                              struct walk_control *wc, int level)
1153 {
1154         struct shared_node *node;
1155         struct shared_node *dest;
1156         int i;
1157
1158         if (level == wc->root_level)
1159                 return 0;
1160
1161         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1162                 if (wc->nodes[i])
1163                         break;
1164         }
1165         BUG_ON(i >= BTRFS_MAX_LEVEL);
1166
1167         node = wc->nodes[wc->active_node];
1168         wc->nodes[wc->active_node] = NULL;
1169         wc->active_node = i;
1170
1171         dest = wc->nodes[wc->active_node];
1172         if (wc->active_node < wc->root_level ||
1173             btrfs_root_refs(&root->root_item) > 0) {
1174                 BUG_ON(node->refs <= 1);
1175                 splice_shared_node(node, dest);
1176         } else {
1177                 BUG_ON(node->refs < 2);
1178                 node->refs--;
1179         }
1180         return 0;
1181 }
1182
1183 /*
1184  * Returns:
1185  * < 0 - on error
1186  * 1   - if the root with id child_root_id is a child of root parent_root_id
1187  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1188  *       has other root(s) as parent(s)
1189  * 2   - if the root child_root_id doesn't have any parent roots
1190  */
1191 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1192                          u64 child_root_id)
1193 {
1194         struct btrfs_path path;
1195         struct btrfs_key key;
1196         struct extent_buffer *leaf;
1197         int has_parent = 0;
1198         int ret;
1199
1200         btrfs_init_path(&path);
1201
1202         key.objectid = parent_root_id;
1203         key.type = BTRFS_ROOT_REF_KEY;
1204         key.offset = child_root_id;
1205         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1206                                 0, 0);
1207         if (ret < 0)
1208                 return ret;
1209         btrfs_release_path(&path);
1210         if (!ret)
1211                 return 1;
1212
1213         key.objectid = child_root_id;
1214         key.type = BTRFS_ROOT_BACKREF_KEY;
1215         key.offset = 0;
1216         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1217                                 0, 0);
1218         if (ret < 0)
1219                 goto out;
1220
1221         while (1) {
1222                 leaf = path.nodes[0];
1223                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1224                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1225                         if (ret)
1226                                 break;
1227                         leaf = path.nodes[0];
1228                 }
1229
1230                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1231                 if (key.objectid != child_root_id ||
1232                     key.type != BTRFS_ROOT_BACKREF_KEY)
1233                         break;
1234
1235                 has_parent = 1;
1236
1237                 if (key.offset == parent_root_id) {
1238                         btrfs_release_path(&path);
1239                         return 1;
1240                 }
1241
1242                 path.slots[0]++;
1243         }
1244 out:
1245         btrfs_release_path(&path);
1246         if (ret < 0)
1247                 return ret;
1248         return has_parent ? 0 : 2;
1249 }
1250
1251 static int process_dir_item(struct extent_buffer *eb,
1252                             int slot, struct btrfs_key *key,
1253                             struct shared_node *active_node)
1254 {
1255         u32 total;
1256         u32 cur = 0;
1257         u32 len;
1258         u32 name_len;
1259         u32 data_len;
1260         int error;
1261         int nritems = 0;
1262         u8 filetype;
1263         struct btrfs_dir_item *di;
1264         struct inode_record *rec;
1265         struct cache_tree *root_cache;
1266         struct cache_tree *inode_cache;
1267         struct btrfs_key location;
1268         char namebuf[BTRFS_NAME_LEN];
1269
1270         root_cache = &active_node->root_cache;
1271         inode_cache = &active_node->inode_cache;
1272         rec = active_node->current;
1273         rec->found_dir_item = 1;
1274
1275         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1276         total = btrfs_item_size_nr(eb, slot);
1277         while (cur < total) {
1278                 nritems++;
1279                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1280                 name_len = btrfs_dir_name_len(eb, di);
1281                 data_len = btrfs_dir_data_len(eb, di);
1282                 filetype = btrfs_dir_type(eb, di);
1283
1284                 rec->found_size += name_len;
1285                 if (cur + sizeof(*di) + name_len > total ||
1286                     name_len > BTRFS_NAME_LEN) {
1287                         error = REF_ERR_NAME_TOO_LONG;
1288
1289                         if (cur + sizeof(*di) > total)
1290                                 break;
1291                         len = min_t(u32, total - cur - sizeof(*di),
1292                                     BTRFS_NAME_LEN);
1293                 } else {
1294                         len = name_len;
1295                         error = 0;
1296                 }
1297
1298                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1299
1300                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1301                     key->offset != btrfs_name_hash(namebuf, len)) {
1302                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1303                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1304                         key->objectid, key->offset, namebuf, len, filetype,
1305                         key->offset, btrfs_name_hash(namebuf, len));
1306                 }
1307
1308                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1309                         add_inode_backref(inode_cache, location.objectid,
1310                                           key->objectid, key->offset, namebuf,
1311                                           len, filetype, key->type, error);
1312                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1313                         add_inode_backref(root_cache, location.objectid,
1314                                           key->objectid, key->offset,
1315                                           namebuf, len, filetype,
1316                                           key->type, error);
1317                 } else {
1318                         fprintf(stderr,
1319                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1320                                 location.type, key->objectid, key->offset);
1321                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1322                                           key->objectid, key->offset, namebuf,
1323                                           len, filetype, key->type, error);
1324                 }
1325
1326                 len = sizeof(*di) + name_len + data_len;
1327                 di = (struct btrfs_dir_item *)((char *)di + len);
1328                 cur += len;
1329         }
1330         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1331                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1332
1333         return 0;
1334 }
1335
1336 static int process_inode_ref(struct extent_buffer *eb,
1337                              int slot, struct btrfs_key *key,
1338                              struct shared_node *active_node)
1339 {
1340         u32 total;
1341         u32 cur = 0;
1342         u32 len;
1343         u32 name_len;
1344         u64 index;
1345         int error;
1346         struct cache_tree *inode_cache;
1347         struct btrfs_inode_ref *ref;
1348         char namebuf[BTRFS_NAME_LEN];
1349
1350         inode_cache = &active_node->inode_cache;
1351
1352         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1353         total = btrfs_item_size_nr(eb, slot);
1354         while (cur < total) {
1355                 name_len = btrfs_inode_ref_name_len(eb, ref);
1356                 index = btrfs_inode_ref_index(eb, ref);
1357
1358                 /* inode_ref + namelen should not cross item boundary */
1359                 if (cur + sizeof(*ref) + name_len > total ||
1360                     name_len > BTRFS_NAME_LEN) {
1361                         if (total < cur + sizeof(*ref))
1362                                 break;
1363
1364                         /* Still try to read out the remaining part */
1365                         len = min_t(u32, total - cur - sizeof(*ref),
1366                                     BTRFS_NAME_LEN);
1367                         error = REF_ERR_NAME_TOO_LONG;
1368                 } else {
1369                         len = name_len;
1370                         error = 0;
1371                 }
1372
1373                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1374                 add_inode_backref(inode_cache, key->objectid, key->offset,
1375                                   index, namebuf, len, 0, key->type, error);
1376
1377                 len = sizeof(*ref) + name_len;
1378                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1379                 cur += len;
1380         }
1381         return 0;
1382 }
1383
1384 static int process_inode_extref(struct extent_buffer *eb,
1385                                 int slot, struct btrfs_key *key,
1386                                 struct shared_node *active_node)
1387 {
1388         u32 total;
1389         u32 cur = 0;
1390         u32 len;
1391         u32 name_len;
1392         u64 index;
1393         u64 parent;
1394         int error;
1395         struct cache_tree *inode_cache;
1396         struct btrfs_inode_extref *extref;
1397         char namebuf[BTRFS_NAME_LEN];
1398
1399         inode_cache = &active_node->inode_cache;
1400
1401         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1402         total = btrfs_item_size_nr(eb, slot);
1403         while (cur < total) {
1404                 name_len = btrfs_inode_extref_name_len(eb, extref);
1405                 index = btrfs_inode_extref_index(eb, extref);
1406                 parent = btrfs_inode_extref_parent(eb, extref);
1407                 if (name_len <= BTRFS_NAME_LEN) {
1408                         len = name_len;
1409                         error = 0;
1410                 } else {
1411                         len = BTRFS_NAME_LEN;
1412                         error = REF_ERR_NAME_TOO_LONG;
1413                 }
1414                 read_extent_buffer(eb, namebuf,
1415                                    (unsigned long)(extref + 1), len);
1416                 add_inode_backref(inode_cache, key->objectid, parent,
1417                                   index, namebuf, len, 0, key->type, error);
1418
1419                 len = sizeof(*extref) + name_len;
1420                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1421                 cur += len;
1422         }
1423         return 0;
1424
1425 }
1426
1427 static int count_csum_range(struct btrfs_root *root, u64 start,
1428                             u64 len, u64 *found)
1429 {
1430         struct btrfs_key key;
1431         struct btrfs_path path;
1432         struct extent_buffer *leaf;
1433         int ret;
1434         size_t size;
1435         *found = 0;
1436         u64 csum_end;
1437         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1438
1439         btrfs_init_path(&path);
1440
1441         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1442         key.offset = start;
1443         key.type = BTRFS_EXTENT_CSUM_KEY;
1444
1445         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1446                                 &key, &path, 0, 0);
1447         if (ret < 0)
1448                 goto out;
1449         if (ret > 0 && path.slots[0] > 0) {
1450                 leaf = path.nodes[0];
1451                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1452                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1453                     key.type == BTRFS_EXTENT_CSUM_KEY)
1454                         path.slots[0]--;
1455         }
1456
1457         while (len > 0) {
1458                 leaf = path.nodes[0];
1459                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1460                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1461                         if (ret > 0)
1462                                 break;
1463                         else if (ret < 0)
1464                                 goto out;
1465                         leaf = path.nodes[0];
1466                 }
1467
1468                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1469                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1470                     key.type != BTRFS_EXTENT_CSUM_KEY)
1471                         break;
1472
1473                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1474                 if (key.offset >= start + len)
1475                         break;
1476
1477                 if (key.offset > start)
1478                         start = key.offset;
1479
1480                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1481                 csum_end = key.offset + (size / csum_size) *
1482                            root->fs_info->sectorsize;
1483                 if (csum_end > start) {
1484                         size = min(csum_end - start, len);
1485                         len -= size;
1486                         start += size;
1487                         *found += size;
1488                 }
1489
1490                 path.slots[0]++;
1491         }
1492 out:
1493         btrfs_release_path(&path);
1494         if (ret < 0)
1495                 return ret;
1496         return 0;
1497 }
1498
1499 static int process_file_extent(struct btrfs_root *root,
1500                                 struct extent_buffer *eb,
1501                                 int slot, struct btrfs_key *key,
1502                                 struct shared_node *active_node)
1503 {
1504         struct inode_record *rec;
1505         struct btrfs_file_extent_item *fi;
1506         u64 num_bytes = 0;
1507         u64 disk_bytenr = 0;
1508         u64 extent_offset = 0;
1509         u64 mask = root->fs_info->sectorsize - 1;
1510         int extent_type;
1511         int ret;
1512
1513         rec = active_node->current;
1514         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1515         rec->found_file_extent = 1;
1516
1517         if (rec->extent_start == (u64)-1) {
1518                 rec->extent_start = key->offset;
1519                 rec->extent_end = key->offset;
1520         }
1521
1522         if (rec->extent_end > key->offset)
1523                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1524         else if (rec->extent_end < key->offset) {
1525                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1526                                            key->offset - rec->extent_end);
1527                 if (ret < 0)
1528                         return ret;
1529         }
1530
1531         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1532         extent_type = btrfs_file_extent_type(eb, fi);
1533
1534         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1535                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1536                 if (num_bytes == 0)
1537                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1538                 rec->found_size += num_bytes;
1539                 num_bytes = (num_bytes + mask) & ~mask;
1540         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1541                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1542                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1543                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1544                 extent_offset = btrfs_file_extent_offset(eb, fi);
1545                 if (num_bytes == 0 || (num_bytes & mask))
1546                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1547                 if (num_bytes + extent_offset >
1548                     btrfs_file_extent_ram_bytes(eb, fi))
1549                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1550                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1551                     (btrfs_file_extent_compression(eb, fi) ||
1552                      btrfs_file_extent_encryption(eb, fi) ||
1553                      btrfs_file_extent_other_encoding(eb, fi)))
1554                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1555                 if (disk_bytenr > 0)
1556                         rec->found_size += num_bytes;
1557         } else {
1558                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1559         }
1560         rec->extent_end = key->offset + num_bytes;
1561
1562         /*
1563          * The data reloc tree will copy full extents into its inode and then
1564          * copy the corresponding csums.  Because the extent it copied could be
1565          * a preallocated extent that hasn't been written to yet there may be no
1566          * csums to copy, ergo we won't have csums for our file extent.  This is
1567          * ok so just don't bother checking csums if the inode belongs to the
1568          * data reloc tree.
1569          */
1570         if (disk_bytenr > 0 &&
1571             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1572                 u64 found;
1573                 if (btrfs_file_extent_compression(eb, fi))
1574                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1575                 else
1576                         disk_bytenr += extent_offset;
1577
1578                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1579                 if (ret < 0)
1580                         return ret;
1581                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1582                         if (found > 0)
1583                                 rec->found_csum_item = 1;
1584                         if (found < num_bytes)
1585                                 rec->some_csum_missing = 1;
1586                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1587                         if (found > 0)
1588                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1589                 }
1590         }
1591         return 0;
1592 }
1593
1594 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1595                             struct walk_control *wc)
1596 {
1597         struct btrfs_key key;
1598         u32 nritems;
1599         int i;
1600         int ret = 0;
1601         struct cache_tree *inode_cache;
1602         struct shared_node *active_node;
1603
1604         if (wc->root_level == wc->active_node &&
1605             btrfs_root_refs(&root->root_item) == 0)
1606                 return 0;
1607
1608         active_node = wc->nodes[wc->active_node];
1609         inode_cache = &active_node->inode_cache;
1610         nritems = btrfs_header_nritems(eb);
1611         for (i = 0; i < nritems; i++) {
1612                 btrfs_item_key_to_cpu(eb, &key, i);
1613
1614                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1615                         continue;
1616                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1617                         continue;
1618
1619                 if (active_node->current == NULL ||
1620                     active_node->current->ino < key.objectid) {
1621                         if (active_node->current) {
1622                                 active_node->current->checked = 1;
1623                                 maybe_free_inode_rec(inode_cache,
1624                                                      active_node->current);
1625                         }
1626                         active_node->current = get_inode_rec(inode_cache,
1627                                                              key.objectid, 1);
1628                         BUG_ON(IS_ERR(active_node->current));
1629                 }
1630                 switch (key.type) {
1631                 case BTRFS_DIR_ITEM_KEY:
1632                 case BTRFS_DIR_INDEX_KEY:
1633                         ret = process_dir_item(eb, i, &key, active_node);
1634                         break;
1635                 case BTRFS_INODE_REF_KEY:
1636                         ret = process_inode_ref(eb, i, &key, active_node);
1637                         break;
1638                 case BTRFS_INODE_EXTREF_KEY:
1639                         ret = process_inode_extref(eb, i, &key, active_node);
1640                         break;
1641                 case BTRFS_INODE_ITEM_KEY:
1642                         ret = process_inode_item(eb, i, &key, active_node);
1643                         break;
1644                 case BTRFS_EXTENT_DATA_KEY:
1645                         ret = process_file_extent(root, eb, i, &key,
1646                                                   active_node);
1647                         break;
1648                 default:
1649                         break;
1650                 };
1651         }
1652         return ret;
1653 }
1654
1655 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1656                              struct extent_buffer *eb, struct node_refs *nrefs,
1657                              u64 level, int check_all);
1658 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1659                             unsigned int ext_ref);
1660
1661 /*
1662  * Returns >0  Found error, not fatal, should continue
1663  * Returns <0  Fatal error, must exit the whole check
1664  * Returns 0   No errors found
1665  */
1666 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1667                                struct node_refs *nrefs, int *level, int ext_ref)
1668 {
1669         struct extent_buffer *cur = path->nodes[0];
1670         struct btrfs_key key;
1671         u64 cur_bytenr;
1672         u32 nritems;
1673         u64 first_ino = 0;
1674         int root_level = btrfs_header_level(root->node);
1675         int i;
1676         int ret = 0; /* Final return value */
1677         int err = 0; /* Positive error bitmap */
1678
1679         cur_bytenr = cur->start;
1680
1681         /* skip to first inode item or the first inode number change */
1682         nritems = btrfs_header_nritems(cur);
1683         for (i = 0; i < nritems; i++) {
1684                 btrfs_item_key_to_cpu(cur, &key, i);
1685                 if (i == 0)
1686                         first_ino = key.objectid;
1687                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1688                     (first_ino && first_ino != key.objectid))
1689                         break;
1690         }
1691         if (i == nritems) {
1692                 path->slots[0] = nritems;
1693                 return 0;
1694         }
1695         path->slots[0] = i;
1696
1697 again:
1698         err |= check_inode_item(root, path, ext_ref);
1699
1700         /* modify cur since check_inode_item may change path */
1701         cur = path->nodes[0];
1702
1703         if (err & LAST_ITEM)
1704                 goto out;
1705
1706         /* still have inode items in thie leaf */
1707         if (cur->start == cur_bytenr)
1708                 goto again;
1709
1710         /*
1711          * we have switched to another leaf, above nodes may
1712          * have changed, here walk down the path, if a node
1713          * or leaf is shared, check whether we can skip this
1714          * node or leaf.
1715          */
1716         for (i = root_level; i >= 0; i--) {
1717                 if (path->nodes[i]->start == nrefs->bytenr[i])
1718                         continue;
1719
1720                 ret = update_nodes_refs(root, path->nodes[i]->start,
1721                                 path->nodes[i], nrefs, i, 0);
1722                 if (ret)
1723                         goto out;
1724
1725                 if (!nrefs->need_check[i]) {
1726                         *level += 1;
1727                         break;
1728                 }
1729         }
1730
1731         for (i = 0; i < *level; i++) {
1732                 free_extent_buffer(path->nodes[i]);
1733                 path->nodes[i] = NULL;
1734         }
1735 out:
1736         err &= ~LAST_ITEM;
1737         if (err && !ret)
1738                 ret = err;
1739         return ret;
1740 }
1741
1742 static void reada_walk_down(struct btrfs_root *root,
1743                             struct extent_buffer *node, int slot)
1744 {
1745         struct btrfs_fs_info *fs_info = root->fs_info;
1746         u64 bytenr;
1747         u64 ptr_gen;
1748         u32 nritems;
1749         int i;
1750         int level;
1751
1752         level = btrfs_header_level(node);
1753         if (level != 1)
1754                 return;
1755
1756         nritems = btrfs_header_nritems(node);
1757         for (i = slot; i < nritems; i++) {
1758                 bytenr = btrfs_node_blockptr(node, i);
1759                 ptr_gen = btrfs_node_ptr_generation(node, i);
1760                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1761         }
1762 }
1763
1764 /*
1765  * Check the child node/leaf by the following condition:
1766  * 1. the first item key of the node/leaf should be the same with the one
1767  *    in parent.
1768  * 2. block in parent node should match the child node/leaf.
1769  * 3. generation of parent node and child's header should be consistent.
1770  *
1771  * Or the child node/leaf pointed by the key in parent is not valid.
1772  *
1773  * We hope to check leaf owner too, but since subvol may share leaves,
1774  * which makes leaf owner check not so strong, key check should be
1775  * sufficient enough for that case.
1776  */
1777 static int check_child_node(struct extent_buffer *parent, int slot,
1778                             struct extent_buffer *child)
1779 {
1780         struct btrfs_key parent_key;
1781         struct btrfs_key child_key;
1782         int ret = 0;
1783
1784         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1785         if (btrfs_header_level(child) == 0)
1786                 btrfs_item_key_to_cpu(child, &child_key, 0);
1787         else
1788                 btrfs_node_key_to_cpu(child, &child_key, 0);
1789
1790         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1791                 ret = -EINVAL;
1792                 fprintf(stderr,
1793                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1794                         parent_key.objectid, parent_key.type, parent_key.offset,
1795                         child_key.objectid, child_key.type, child_key.offset);
1796         }
1797         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1798                 ret = -EINVAL;
1799                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1800                         btrfs_node_blockptr(parent, slot),
1801                         btrfs_header_bytenr(child));
1802         }
1803         if (btrfs_node_ptr_generation(parent, slot) !=
1804             btrfs_header_generation(child)) {
1805                 ret = -EINVAL;
1806                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1807                         btrfs_header_generation(child),
1808                         btrfs_node_ptr_generation(parent, slot));
1809         }
1810         return ret;
1811 }
1812
1813 /*
1814  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1815  * in every fs or file tree check. Here we find its all root ids, and only check
1816  * it in the fs or file tree which has the smallest root id.
1817  */
1818 static int need_check(struct btrfs_root *root, struct ulist *roots)
1819 {
1820         struct rb_node *node;
1821         struct ulist_node *u;
1822
1823         /*
1824          * @roots can be empty if it belongs to tree reloc tree
1825          * In that case, we should always check the leaf, as we can't use
1826          * the tree owner to ensure some other root will check it.
1827          */
1828         if (roots->nnodes == 1 || roots->nnodes == 0)
1829                 return 1;
1830
1831         node = rb_first(&roots->root);
1832         u = rb_entry(node, struct ulist_node, rb_node);
1833         /*
1834          * current root id is not smallest, we skip it and let it be checked
1835          * in the fs or file tree who hash the smallest root id.
1836          */
1837         if (root->objectid != u->val)
1838                 return 0;
1839
1840         return 1;
1841 }
1842
1843 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1844                                u64 *flags_ret)
1845 {
1846         struct btrfs_root *extent_root = root->fs_info->extent_root;
1847         struct btrfs_root_item *ri = &root->root_item;
1848         struct btrfs_extent_inline_ref *iref;
1849         struct btrfs_extent_item *ei;
1850         struct btrfs_key key;
1851         struct btrfs_path *path = NULL;
1852         unsigned long ptr;
1853         unsigned long end;
1854         u64 flags;
1855         u64 owner = 0;
1856         u64 offset;
1857         int slot;
1858         int type;
1859         int ret = 0;
1860
1861         /*
1862          * Except file/reloc tree, we can not have FULL BACKREF MODE
1863          */
1864         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1865                 goto normal;
1866
1867         /* root node */
1868         if (eb->start == btrfs_root_bytenr(ri))
1869                 goto normal;
1870
1871         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1872                 goto full_backref;
1873
1874         owner = btrfs_header_owner(eb);
1875         if (owner == root->objectid)
1876                 goto normal;
1877
1878         path = btrfs_alloc_path();
1879         if (!path)
1880                 return -ENOMEM;
1881
1882         key.objectid = btrfs_header_bytenr(eb);
1883         key.type = (u8)-1;
1884         key.offset = (u64)-1;
1885
1886         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1887         if (ret <= 0) {
1888                 ret = -EIO;
1889                 goto out;
1890         }
1891
1892         if (ret > 0) {
1893                 ret = btrfs_previous_extent_item(extent_root, path,
1894                                                  key.objectid);
1895                 if (ret)
1896                         goto full_backref;
1897
1898         }
1899         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1900
1901         eb = path->nodes[0];
1902         slot = path->slots[0];
1903         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1904
1905         flags = btrfs_extent_flags(eb, ei);
1906         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1907                 goto full_backref;
1908
1909         ptr = (unsigned long)(ei + 1);
1910         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1911
1912         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1913                 ptr += sizeof(struct btrfs_tree_block_info);
1914
1915 next:
1916         /* Reached extent item ends normally */
1917         if (ptr == end)
1918                 goto full_backref;
1919
1920         /* Beyond extent item end, wrong item size */
1921         if (ptr > end) {
1922                 error("extent item at bytenr %llu slot %d has wrong size",
1923                         eb->start, slot);
1924                 goto full_backref;
1925         }
1926
1927         iref = (struct btrfs_extent_inline_ref *)ptr;
1928         offset = btrfs_extent_inline_ref_offset(eb, iref);
1929         type = btrfs_extent_inline_ref_type(eb, iref);
1930
1931         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1932                 goto normal;
1933         ptr += btrfs_extent_inline_ref_size(type);
1934         goto next;
1935
1936 normal:
1937         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
1938         goto out;
1939
1940 full_backref:
1941         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1942 out:
1943         btrfs_free_path(path);
1944         return ret;
1945 }
1946
1947 /*
1948  * for a tree node or leaf, we record its reference count, so later if we still
1949  * process this node or leaf, don't need to compute its reference count again.
1950  *
1951  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
1952  */
1953 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1954                              struct extent_buffer *eb, struct node_refs *nrefs,
1955                              u64 level, int check_all)
1956 {
1957         struct ulist *roots;
1958         u64 refs = 0;
1959         u64 flags = 0;
1960         int root_level = btrfs_header_level(root->node);
1961         int check;
1962         int ret;
1963
1964         if (nrefs->bytenr[level] == bytenr)
1965                 return 0;
1966
1967         if (bytenr != (u64)-1) {
1968                 /* the return value of this function seems a mistake */
1969                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
1970                                        level, 1, &refs, &flags);
1971                 /* temporary fix */
1972                 if (ret < 0 && !check_all)
1973                         return ret;
1974
1975                 nrefs->bytenr[level] = bytenr;
1976                 nrefs->refs[level] = refs;
1977                 nrefs->full_backref[level] = 0;
1978                 nrefs->checked[level] = 0;
1979
1980                 if (refs > 1) {
1981                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
1982                                                    0, &roots);
1983                         if (ret)
1984                                 return -EIO;
1985
1986                         check = need_check(root, roots);
1987                         ulist_free(roots);
1988                         nrefs->need_check[level] = check;
1989                 } else {
1990                         if (!check_all) {
1991                                 nrefs->need_check[level] = 1;
1992                         } else {
1993                                 if (level == root_level) {
1994                                         nrefs->need_check[level] = 1;
1995                                 } else {
1996                                         /*
1997                                          * The node refs may have not been
1998                                          * updated if upper needs checking (the
1999                                          * lowest root_objectid) the node can
2000                                          * be checked.
2001                                          */
2002                                         nrefs->need_check[level] =
2003                                                 nrefs->need_check[level + 1];
2004                                 }
2005                         }
2006                 }
2007         }
2008
2009         if (check_all && eb) {
2010                 calc_extent_flag_v2(root, eb, &flags);
2011                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2012                         nrefs->full_backref[level] = 1;
2013         }
2014
2015         return 0;
2016 }
2017
2018 /*
2019  * @level           if @level == -1 means extent data item
2020  *                  else normal treeblocl.
2021  */
2022 static int should_check_extent_strictly(struct btrfs_root *root,
2023                                         struct node_refs *nrefs, int level)
2024 {
2025         int root_level = btrfs_header_level(root->node);
2026
2027         if (level > root_level || level < -1)
2028                 return 1;
2029         if (level == root_level)
2030                 return 1;
2031         /*
2032          * if the upper node is marked full backref, it should contain shared
2033          * backref of the parent (except owner == root->objectid).
2034          */
2035         while (++level <= root_level)
2036                 if (nrefs->refs[level] > 1)
2037                         return 0;
2038
2039         return 1;
2040 }
2041
2042 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2043                           struct walk_control *wc, int *level,
2044                           struct node_refs *nrefs)
2045 {
2046         enum btrfs_tree_block_status status;
2047         u64 bytenr;
2048         u64 ptr_gen;
2049         struct btrfs_fs_info *fs_info = root->fs_info;
2050         struct extent_buffer *next;
2051         struct extent_buffer *cur;
2052         int ret, err = 0;
2053         u64 refs;
2054
2055         WARN_ON(*level < 0);
2056         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2057
2058         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2059                 refs = nrefs->refs[*level];
2060                 ret = 0;
2061         } else {
2062                 ret = btrfs_lookup_extent_info(NULL, root,
2063                                        path->nodes[*level]->start,
2064                                        *level, 1, &refs, NULL);
2065                 if (ret < 0) {
2066                         err = ret;
2067                         goto out;
2068                 }
2069                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2070                 nrefs->refs[*level] = refs;
2071         }
2072
2073         if (refs > 1) {
2074                 ret = enter_shared_node(root, path->nodes[*level]->start,
2075                                         refs, wc, *level);
2076                 if (ret > 0) {
2077                         err = ret;
2078                         goto out;
2079                 }
2080         }
2081
2082         while (*level >= 0) {
2083                 WARN_ON(*level < 0);
2084                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2085                 cur = path->nodes[*level];
2086
2087                 if (btrfs_header_level(cur) != *level)
2088                         WARN_ON(1);
2089
2090                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2091                         break;
2092                 if (*level == 0) {
2093                         ret = process_one_leaf(root, cur, wc);
2094                         if (ret < 0)
2095                                 err = ret;
2096                         break;
2097                 }
2098                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2099                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2100
2101                 if (bytenr == nrefs->bytenr[*level - 1]) {
2102                         refs = nrefs->refs[*level - 1];
2103                 } else {
2104                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2105                                         *level - 1, 1, &refs, NULL);
2106                         if (ret < 0) {
2107                                 refs = 0;
2108                         } else {
2109                                 nrefs->bytenr[*level - 1] = bytenr;
2110                                 nrefs->refs[*level - 1] = refs;
2111                         }
2112                 }
2113
2114                 if (refs > 1) {
2115                         ret = enter_shared_node(root, bytenr, refs,
2116                                                 wc, *level - 1);
2117                         if (ret > 0) {
2118                                 path->slots[*level]++;
2119                                 continue;
2120                         }
2121                 }
2122
2123                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2124                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2125                         free_extent_buffer(next);
2126                         reada_walk_down(root, cur, path->slots[*level]);
2127                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2128                         if (!extent_buffer_uptodate(next)) {
2129                                 struct btrfs_key node_key;
2130
2131                                 btrfs_node_key_to_cpu(path->nodes[*level],
2132                                                       &node_key,
2133                                                       path->slots[*level]);
2134                                 btrfs_add_corrupt_extent_record(root->fs_info,
2135                                                 &node_key,
2136                                                 path->nodes[*level]->start,
2137                                                 root->fs_info->nodesize,
2138                                                 *level);
2139                                 err = -EIO;
2140                                 goto out;
2141                         }
2142                 }
2143
2144                 ret = check_child_node(cur, path->slots[*level], next);
2145                 if (ret) {
2146                         free_extent_buffer(next);
2147                         err = ret;
2148                         goto out;
2149                 }
2150
2151                 if (btrfs_is_leaf(next))
2152                         status = btrfs_check_leaf(root, NULL, next);
2153                 else
2154                         status = btrfs_check_node(root, NULL, next);
2155                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2156                         free_extent_buffer(next);
2157                         err = -EIO;
2158                         goto out;
2159                 }
2160
2161                 *level = *level - 1;
2162                 free_extent_buffer(path->nodes[*level]);
2163                 path->nodes[*level] = next;
2164                 path->slots[*level] = 0;
2165         }
2166 out:
2167         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2168         return err;
2169 }
2170
2171 static int fs_root_objectid(u64 objectid);
2172
2173 /*
2174  * Update global fs information.
2175  */
2176 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2177                          int level)
2178 {
2179         u32 free_nrs;
2180         struct extent_buffer *eb = path->nodes[level];
2181
2182         total_btree_bytes += eb->len;
2183         if (fs_root_objectid(root->objectid))
2184                 total_fs_tree_bytes += eb->len;
2185         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2186                 total_extent_tree_bytes += eb->len;
2187
2188         if (level == 0) {
2189                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2190         } else {
2191                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2192                             btrfs_header_nritems(eb));
2193                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2194         }
2195 }
2196
2197 /*
2198  * This function only handles BACKREF_MISSING,
2199  * If corresponding extent item exists, increase the ref, else insert an extent
2200  * item and backref.
2201  *
2202  * Returns error bits after repair.
2203  */
2204 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2205                                  struct btrfs_root *root,
2206                                  struct extent_buffer *node,
2207                                  struct node_refs *nrefs, int level, int err)
2208 {
2209         struct btrfs_fs_info *fs_info = root->fs_info;
2210         struct btrfs_root *extent_root = fs_info->extent_root;
2211         struct btrfs_path path;
2212         struct btrfs_extent_item *ei;
2213         struct btrfs_tree_block_info *bi;
2214         struct btrfs_key key;
2215         struct extent_buffer *eb;
2216         u32 size = sizeof(*ei);
2217         u32 node_size = root->fs_info->nodesize;
2218         int insert_extent = 0;
2219         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2220         int root_level = btrfs_header_level(root->node);
2221         int generation;
2222         int ret;
2223         u64 owner;
2224         u64 bytenr;
2225         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2226         u64 parent = 0;
2227
2228         if ((err & BACKREF_MISSING) == 0)
2229                 return err;
2230
2231         WARN_ON(level > BTRFS_MAX_LEVEL);
2232         WARN_ON(level < 0);
2233
2234         btrfs_init_path(&path);
2235         bytenr = btrfs_header_bytenr(node);
2236         owner = btrfs_header_owner(node);
2237         generation = btrfs_header_generation(node);
2238
2239         key.objectid = bytenr;
2240         key.type = (u8)-1;
2241         key.offset = (u64)-1;
2242
2243         /* Search for the extent item */
2244         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2245         if (ret <= 0) {
2246                 ret = -EIO;
2247                 goto out;
2248         }
2249
2250         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2251         if (ret)
2252                 insert_extent = 1;
2253
2254         /* calculate if the extent item flag is full backref or not */
2255         if (nrefs->full_backref[level] != 0)
2256                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2257
2258         /* insert an extent item */
2259         if (insert_extent) {
2260                 struct btrfs_disk_key copy_key;
2261
2262                 generation = btrfs_header_generation(node);
2263
2264                 if (level < root_level && nrefs->full_backref[level + 1] &&
2265                     owner != root->objectid) {
2266                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2267                 }
2268
2269                 key.objectid = bytenr;
2270                 if (!skinny_metadata) {
2271                         key.type = BTRFS_EXTENT_ITEM_KEY;
2272                         key.offset = node_size;
2273                         size += sizeof(*bi);
2274                 } else {
2275                         key.type = BTRFS_METADATA_ITEM_KEY;
2276                         key.offset = level;
2277                 }
2278
2279                 btrfs_release_path(&path);
2280                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2281                                               size);
2282                 if (ret)
2283                         goto out;
2284
2285                 eb = path.nodes[0];
2286                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2287
2288                 btrfs_set_extent_refs(eb, ei, 0);
2289                 btrfs_set_extent_generation(eb, ei, generation);
2290                 btrfs_set_extent_flags(eb, ei, flags);
2291
2292                 if (!skinny_metadata) {
2293                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2294                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2295                                              sizeof(*bi));
2296                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2297                         btrfs_set_disk_key_type(&copy_key, 0);
2298                         btrfs_set_disk_key_offset(&copy_key, 0);
2299
2300                         btrfs_set_tree_block_level(eb, bi, level);
2301                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2302                 }
2303                 btrfs_mark_buffer_dirty(eb);
2304                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2305                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2306
2307                 nrefs->refs[level] = 0;
2308                 nrefs->full_backref[level] =
2309                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2310                 btrfs_release_path(&path);
2311         }
2312
2313         if (level < root_level && nrefs->full_backref[level + 1] &&
2314             owner != root->objectid)
2315                 parent = nrefs->bytenr[level + 1];
2316
2317         /* increase the ref */
2318         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2319                         parent, root->objectid, level, 0);
2320
2321         nrefs->refs[level]++;
2322 out:
2323         btrfs_release_path(&path);
2324         if (ret) {
2325                 error(
2326         "failed to repair tree block ref start %llu root %llu due to %s",
2327                       bytenr, root->objectid, strerror(-ret));
2328         } else {
2329                 printf("Added one tree block ref start %llu %s %llu\n",
2330                        bytenr, parent ? "parent" : "root",
2331                        parent ? parent : root->objectid);
2332                 err &= ~BACKREF_MISSING;
2333         }
2334
2335         return err;
2336 }
2337
2338 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2339                             unsigned int ext_ref);
2340 static int check_tree_block_ref(struct btrfs_root *root,
2341                                 struct extent_buffer *eb, u64 bytenr,
2342                                 int level, u64 owner, struct node_refs *nrefs);
2343 static int check_leaf_items(struct btrfs_trans_handle *trans,
2344                             struct btrfs_root *root, struct btrfs_path *path,
2345                             struct node_refs *nrefs, int account_bytes);
2346
2347 /*
2348  * @trans      just for lowmem repair mode
2349  * @check all  if not 0 then check all tree block backrefs and items
2350  *             0 then just check relationship of items in fs tree(s)
2351  *
2352  * Returns >0  Found error, should continue
2353  * Returns <0  Fatal error, must exit the whole check
2354  * Returns 0   No errors found
2355  */
2356 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2357                              struct btrfs_root *root, struct btrfs_path *path,
2358                              int *level, struct node_refs *nrefs, int ext_ref,
2359                              int check_all)
2360
2361 {
2362         enum btrfs_tree_block_status status;
2363         u64 bytenr;
2364         u64 ptr_gen;
2365         struct btrfs_fs_info *fs_info = root->fs_info;
2366         struct extent_buffer *next;
2367         struct extent_buffer *cur;
2368         int ret;
2369         int err = 0;
2370         int check;
2371         int account_file_data = 0;
2372
2373         WARN_ON(*level < 0);
2374         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2375
2376         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2377                                 path->nodes[*level], nrefs, *level, check_all);
2378         if (ret < 0)
2379                 return ret;
2380
2381         while (*level >= 0) {
2382                 WARN_ON(*level < 0);
2383                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2384                 cur = path->nodes[*level];
2385                 bytenr = btrfs_header_bytenr(cur);
2386                 check = nrefs->need_check[*level];
2387
2388                 if (btrfs_header_level(cur) != *level)
2389                         WARN_ON(1);
2390                /*
2391                 * Update bytes accounting and check tree block ref
2392                 * NOTE: Doing accounting and check before checking nritems
2393                 * is necessary because of empty node/leaf.
2394                 */
2395                 if ((check_all && !nrefs->checked[*level]) ||
2396                     (!check_all && nrefs->need_check[*level])) {
2397                         ret = check_tree_block_ref(root, cur,
2398                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2399                            btrfs_header_owner(cur), nrefs);
2400
2401                         if (repair && ret)
2402                                 ret = repair_tree_block_ref(trans, root,
2403                                     path->nodes[*level], nrefs, *level, ret);
2404                         err |= ret;
2405
2406                         if (check_all && nrefs->need_check[*level] &&
2407                                 nrefs->refs[*level]) {
2408                                 account_bytes(root, path, *level);
2409                                 account_file_data = 1;
2410                         }
2411                         nrefs->checked[*level] = 1;
2412                 }
2413
2414                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2415                         break;
2416
2417                 /* Don't forgot to check leaf/node validation */
2418                 if (*level == 0) {
2419                         /* skip duplicate check */
2420                         if (check || !check_all) {
2421                                 ret = btrfs_check_leaf(root, NULL, cur);
2422                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2423                                         err |= -EIO;
2424                                         break;
2425                                 }
2426                         }
2427
2428                         ret = 0;
2429                         if (!check_all)
2430                                 ret = process_one_leaf_v2(root, path, nrefs,
2431                                                           level, ext_ref);
2432                         else
2433                                 ret = check_leaf_items(trans, root, path,
2434                                                nrefs, account_file_data);
2435                         err |= ret;
2436                         break;
2437                 } else {
2438                         if (check || !check_all) {
2439                                 ret = btrfs_check_node(root, NULL, cur);
2440                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2441                                         err |= -EIO;
2442                                         break;
2443                                 }
2444                         }
2445                 }
2446
2447                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2448                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2449
2450                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2451                                         check_all);
2452                 if (ret < 0)
2453                         break;
2454                 /*
2455                  * check all trees in check_chunks_and_extent_v2
2456                  * check shared node once in check_fs_roots
2457                  */
2458                 if (!check_all && !nrefs->need_check[*level - 1]) {
2459                         path->slots[*level]++;
2460                         continue;
2461                 }
2462
2463                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2464                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2465                         free_extent_buffer(next);
2466                         reada_walk_down(root, cur, path->slots[*level]);
2467                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2468                         if (!extent_buffer_uptodate(next)) {
2469                                 struct btrfs_key node_key;
2470
2471                                 btrfs_node_key_to_cpu(path->nodes[*level],
2472                                                       &node_key,
2473                                                       path->slots[*level]);
2474                                 btrfs_add_corrupt_extent_record(fs_info,
2475                                         &node_key, path->nodes[*level]->start,
2476                                         fs_info->nodesize, *level);
2477                                 err |= -EIO;
2478                                 break;
2479                         }
2480                 }
2481
2482                 ret = check_child_node(cur, path->slots[*level], next);
2483                 err |= ret;
2484                 if (ret < 0) 
2485                         break;
2486
2487                 if (btrfs_is_leaf(next))
2488                         status = btrfs_check_leaf(root, NULL, next);
2489                 else
2490                         status = btrfs_check_node(root, NULL, next);
2491                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2492                         free_extent_buffer(next);
2493                         err |= -EIO;
2494                         break;
2495                 }
2496
2497                 *level = *level - 1;
2498                 free_extent_buffer(path->nodes[*level]);
2499                 path->nodes[*level] = next;
2500                 path->slots[*level] = 0;
2501                 account_file_data = 0;
2502
2503                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2504         }
2505         return err;
2506 }
2507
2508 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2509                         struct walk_control *wc, int *level)
2510 {
2511         int i;
2512         struct extent_buffer *leaf;
2513
2514         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2515                 leaf = path->nodes[i];
2516                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2517                         path->slots[i]++;
2518                         *level = i;
2519                         return 0;
2520                 } else {
2521                         free_extent_buffer(path->nodes[*level]);
2522                         path->nodes[*level] = NULL;
2523                         BUG_ON(*level > wc->active_node);
2524                         if (*level == wc->active_node)
2525                                 leave_shared_node(root, wc, *level);
2526                         *level = i + 1;
2527                 }
2528         }
2529         return 1;
2530 }
2531
2532 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2533                            int *level)
2534 {
2535         int i;
2536         struct extent_buffer *leaf;
2537
2538         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2539                 leaf = path->nodes[i];
2540                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2541                         path->slots[i]++;
2542                         *level = i;
2543                         return 0;
2544                 } else {
2545                         free_extent_buffer(path->nodes[*level]);
2546                         path->nodes[*level] = NULL;
2547                         *level = i + 1;
2548                 }
2549         }
2550         return 1;
2551 }
2552
2553 static int check_root_dir(struct inode_record *rec)
2554 {
2555         struct inode_backref *backref;
2556         int ret = -1;
2557
2558         if (!rec->found_inode_item || rec->errors)
2559                 goto out;
2560         if (rec->nlink != 1 || rec->found_link != 0)
2561                 goto out;
2562         if (list_empty(&rec->backrefs))
2563                 goto out;
2564         backref = to_inode_backref(rec->backrefs.next);
2565         if (!backref->found_inode_ref)
2566                 goto out;
2567         if (backref->index != 0 || backref->namelen != 2 ||
2568             memcmp(backref->name, "..", 2))
2569                 goto out;
2570         if (backref->found_dir_index || backref->found_dir_item)
2571                 goto out;
2572         ret = 0;
2573 out:
2574         return ret;
2575 }
2576
2577 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2578                               struct btrfs_root *root, struct btrfs_path *path,
2579                               struct inode_record *rec)
2580 {
2581         struct btrfs_inode_item *ei;
2582         struct btrfs_key key;
2583         int ret;
2584
2585         key.objectid = rec->ino;
2586         key.type = BTRFS_INODE_ITEM_KEY;
2587         key.offset = (u64)-1;
2588
2589         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2590         if (ret < 0)
2591                 goto out;
2592         if (ret) {
2593                 if (!path->slots[0]) {
2594                         ret = -ENOENT;
2595                         goto out;
2596                 }
2597                 path->slots[0]--;
2598                 ret = 0;
2599         }
2600         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2601         if (key.objectid != rec->ino) {
2602                 ret = -ENOENT;
2603                 goto out;
2604         }
2605
2606         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2607                             struct btrfs_inode_item);
2608         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2609         btrfs_mark_buffer_dirty(path->nodes[0]);
2610         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2611         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2612                root->root_key.objectid);
2613 out:
2614         btrfs_release_path(path);
2615         return ret;
2616 }
2617
2618 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2619                                     struct btrfs_root *root,
2620                                     struct btrfs_path *path,
2621                                     struct inode_record *rec)
2622 {
2623         int ret;
2624
2625         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2626         btrfs_release_path(path);
2627         if (!ret)
2628                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2629         return ret;
2630 }
2631
2632 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2633                                struct btrfs_root *root,
2634                                struct btrfs_path *path,
2635                                struct inode_record *rec)
2636 {
2637         struct btrfs_inode_item *ei;
2638         struct btrfs_key key;
2639         int ret = 0;
2640
2641         key.objectid = rec->ino;
2642         key.type = BTRFS_INODE_ITEM_KEY;
2643         key.offset = 0;
2644
2645         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2646         if (ret) {
2647                 if (ret > 0)
2648                         ret = -ENOENT;
2649                 goto out;
2650         }
2651
2652         /* Since ret == 0, no need to check anything */
2653         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2654                             struct btrfs_inode_item);
2655         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2656         btrfs_mark_buffer_dirty(path->nodes[0]);
2657         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2658         printf("reset nbytes for ino %llu root %llu\n",
2659                rec->ino, root->root_key.objectid);
2660 out:
2661         btrfs_release_path(path);
2662         return ret;
2663 }
2664
2665 static int add_missing_dir_index(struct btrfs_root *root,
2666                                  struct cache_tree *inode_cache,
2667                                  struct inode_record *rec,
2668                                  struct inode_backref *backref)
2669 {
2670         struct btrfs_path path;
2671         struct btrfs_trans_handle *trans;
2672         struct btrfs_dir_item *dir_item;
2673         struct extent_buffer *leaf;
2674         struct btrfs_key key;
2675         struct btrfs_disk_key disk_key;
2676         struct inode_record *dir_rec;
2677         unsigned long name_ptr;
2678         u32 data_size = sizeof(*dir_item) + backref->namelen;
2679         int ret;
2680
2681         trans = btrfs_start_transaction(root, 1);
2682         if (IS_ERR(trans))
2683                 return PTR_ERR(trans);
2684
2685         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2686                 (unsigned long long)rec->ino);
2687
2688         btrfs_init_path(&path);
2689         key.objectid = backref->dir;
2690         key.type = BTRFS_DIR_INDEX_KEY;
2691         key.offset = backref->index;
2692         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2693         BUG_ON(ret);
2694
2695         leaf = path.nodes[0];
2696         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2697
2698         disk_key.objectid = cpu_to_le64(rec->ino);
2699         disk_key.type = BTRFS_INODE_ITEM_KEY;
2700         disk_key.offset = 0;
2701
2702         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2703         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2704         btrfs_set_dir_data_len(leaf, dir_item, 0);
2705         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2706         name_ptr = (unsigned long)(dir_item + 1);
2707         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2708         btrfs_mark_buffer_dirty(leaf);
2709         btrfs_release_path(&path);
2710         btrfs_commit_transaction(trans, root);
2711
2712         backref->found_dir_index = 1;
2713         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2714         BUG_ON(IS_ERR(dir_rec));
2715         if (!dir_rec)
2716                 return 0;
2717         dir_rec->found_size += backref->namelen;
2718         if (dir_rec->found_size == dir_rec->isize &&
2719             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2720                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2721         if (dir_rec->found_size != dir_rec->isize)
2722                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2723
2724         return 0;
2725 }
2726
2727 static int delete_dir_index(struct btrfs_root *root,
2728                             struct inode_backref *backref)
2729 {
2730         struct btrfs_trans_handle *trans;
2731         struct btrfs_dir_item *di;
2732         struct btrfs_path path;
2733         int ret = 0;
2734
2735         trans = btrfs_start_transaction(root, 1);
2736         if (IS_ERR(trans))
2737                 return PTR_ERR(trans);
2738
2739         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2740                 (unsigned long long)backref->dir,
2741                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2742                 (unsigned long long)root->objectid);
2743
2744         btrfs_init_path(&path);
2745         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2746                                     backref->name, backref->namelen,
2747                                     backref->index, -1);
2748         if (IS_ERR(di)) {
2749                 ret = PTR_ERR(di);
2750                 btrfs_release_path(&path);
2751                 btrfs_commit_transaction(trans, root);
2752                 if (ret == -ENOENT)
2753                         return 0;
2754                 return ret;
2755         }
2756
2757         if (!di)
2758                 ret = btrfs_del_item(trans, root, &path);
2759         else
2760                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2761         BUG_ON(ret);
2762         btrfs_release_path(&path);
2763         btrfs_commit_transaction(trans, root);
2764         return ret;
2765 }
2766
2767 static int __create_inode_item(struct btrfs_trans_handle *trans,
2768                                struct btrfs_root *root, u64 ino, u64 size,
2769                                u64 nbytes, u64 nlink, u32 mode)
2770 {
2771         struct btrfs_inode_item ii;
2772         time_t now = time(NULL);
2773         int ret;
2774
2775         btrfs_set_stack_inode_size(&ii, size);
2776         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2777         btrfs_set_stack_inode_nlink(&ii, nlink);
2778         btrfs_set_stack_inode_mode(&ii, mode);
2779         btrfs_set_stack_inode_generation(&ii, trans->transid);
2780         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2781         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2782         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2783         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2784         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2785         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2786         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2787
2788         ret = btrfs_insert_inode(trans, root, ino, &ii);
2789         ASSERT(!ret);
2790
2791         warning("root %llu inode %llu recreating inode item, this may "
2792                 "be incomplete, please check permissions and content after "
2793                 "the fsck completes.\n", (unsigned long long)root->objectid,
2794                 (unsigned long long)ino);
2795
2796         return 0;
2797 }
2798
2799 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2800                                     struct btrfs_root *root, u64 ino,
2801                                     u8 filetype)
2802 {
2803         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2804
2805         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2806 }
2807
2808 static int create_inode_item(struct btrfs_root *root,
2809                              struct inode_record *rec, int root_dir)
2810 {
2811         struct btrfs_trans_handle *trans;
2812         u64 nlink = 0;
2813         u32 mode = 0;
2814         u64 size = 0;
2815         int ret;
2816
2817         trans = btrfs_start_transaction(root, 1);
2818         if (IS_ERR(trans)) {
2819                 ret = PTR_ERR(trans);
2820                 return ret;
2821         }
2822
2823         nlink = root_dir ? 1 : rec->found_link;
2824         if (rec->found_dir_item) {
2825                 if (rec->found_file_extent)
2826                         fprintf(stderr, "root %llu inode %llu has both a dir "
2827                                 "item and extents, unsure if it is a dir or a "
2828                                 "regular file so setting it as a directory\n",
2829                                 (unsigned long long)root->objectid,
2830                                 (unsigned long long)rec->ino);
2831                 mode = S_IFDIR | 0755;
2832                 size = rec->found_size;
2833         } else if (!rec->found_dir_item) {
2834                 size = rec->extent_end;
2835                 mode =  S_IFREG | 0755;
2836         }
2837
2838         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2839                                   nlink, mode);
2840         btrfs_commit_transaction(trans, root);
2841         return 0;
2842 }
2843
2844 static int repair_inode_backrefs(struct btrfs_root *root,
2845                                  struct inode_record *rec,
2846                                  struct cache_tree *inode_cache,
2847                                  int delete)
2848 {
2849         struct inode_backref *tmp, *backref;
2850         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2851         int ret = 0;
2852         int repaired = 0;
2853
2854         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2855                 if (!delete && rec->ino == root_dirid) {
2856                         if (!rec->found_inode_item) {
2857                                 ret = create_inode_item(root, rec, 1);
2858                                 if (ret)
2859                                         break;
2860                                 repaired++;
2861                         }
2862                 }
2863
2864                 /* Index 0 for root dir's are special, don't mess with it */
2865                 if (rec->ino == root_dirid && backref->index == 0)
2866                         continue;
2867
2868                 if (delete &&
2869                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2870                      (backref->found_dir_index && backref->found_inode_ref &&
2871                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2872                         ret = delete_dir_index(root, backref);
2873                         if (ret)
2874                                 break;
2875                         repaired++;
2876                         list_del(&backref->list);
2877                         free(backref);
2878                         continue;
2879                 }
2880
2881                 if (!delete && !backref->found_dir_index &&
2882                     backref->found_dir_item && backref->found_inode_ref) {
2883                         ret = add_missing_dir_index(root, inode_cache, rec,
2884                                                     backref);
2885                         if (ret)
2886                                 break;
2887                         repaired++;
2888                         if (backref->found_dir_item &&
2889                             backref->found_dir_index) {
2890                                 if (!backref->errors &&
2891                                     backref->found_inode_ref) {
2892                                         list_del(&backref->list);
2893                                         free(backref);
2894                                         continue;
2895                                 }
2896                         }
2897                 }
2898
2899                 if (!delete && (!backref->found_dir_index &&
2900                                 !backref->found_dir_item &&
2901                                 backref->found_inode_ref)) {
2902                         struct btrfs_trans_handle *trans;
2903                         struct btrfs_key location;
2904
2905                         ret = check_dir_conflict(root, backref->name,
2906                                                  backref->namelen,
2907                                                  backref->dir,
2908                                                  backref->index);
2909                         if (ret) {
2910                                 /*
2911                                  * let nlink fixing routine to handle it,
2912                                  * which can do it better.
2913                                  */
2914                                 ret = 0;
2915                                 break;
2916                         }
2917                         location.objectid = rec->ino;
2918                         location.type = BTRFS_INODE_ITEM_KEY;
2919                         location.offset = 0;
2920
2921                         trans = btrfs_start_transaction(root, 1);
2922                         if (IS_ERR(trans)) {
2923                                 ret = PTR_ERR(trans);
2924                                 break;
2925                         }
2926                         fprintf(stderr, "adding missing dir index/item pair "
2927                                 "for inode %llu\n",
2928                                 (unsigned long long)rec->ino);
2929                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2930                                                     backref->namelen,
2931                                                     backref->dir, &location,
2932                                                     imode_to_type(rec->imode),
2933                                                     backref->index);
2934                         BUG_ON(ret);
2935                         btrfs_commit_transaction(trans, root);
2936                         repaired++;
2937                 }
2938
2939                 if (!delete && (backref->found_inode_ref &&
2940                                 backref->found_dir_index &&
2941                                 backref->found_dir_item &&
2942                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
2943                                 !rec->found_inode_item)) {
2944                         ret = create_inode_item(root, rec, 0);
2945                         if (ret)
2946                                 break;
2947                         repaired++;
2948                 }
2949
2950         }
2951         return ret ? ret : repaired;
2952 }
2953
2954 /*
2955  * To determine the file type for nlink/inode_item repair
2956  *
2957  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
2958  * Return -ENOENT if file type is not found.
2959  */
2960 static int find_file_type(struct inode_record *rec, u8 *type)
2961 {
2962         struct inode_backref *backref;
2963
2964         /* For inode item recovered case */
2965         if (rec->found_inode_item) {
2966                 *type = imode_to_type(rec->imode);
2967                 return 0;
2968         }
2969
2970         list_for_each_entry(backref, &rec->backrefs, list) {
2971                 if (backref->found_dir_index || backref->found_dir_item) {
2972                         *type = backref->filetype;
2973                         return 0;
2974                 }
2975         }
2976         return -ENOENT;
2977 }
2978
2979 /*
2980  * To determine the file name for nlink repair
2981  *
2982  * Return 0 if file name is found, set name and namelen.
2983  * Return -ENOENT if file name is not found.
2984  */
2985 static int find_file_name(struct inode_record *rec,
2986                           char *name, int *namelen)
2987 {
2988         struct inode_backref *backref;
2989
2990         list_for_each_entry(backref, &rec->backrefs, list) {
2991                 if (backref->found_dir_index || backref->found_dir_item ||
2992                     backref->found_inode_ref) {
2993                         memcpy(name, backref->name, backref->namelen);
2994                         *namelen = backref->namelen;
2995                         return 0;
2996                 }
2997         }
2998         return -ENOENT;
2999 }
3000
3001 /* Reset the nlink of the inode to the correct one */
3002 static int reset_nlink(struct btrfs_trans_handle *trans,
3003                        struct btrfs_root *root,
3004                        struct btrfs_path *path,
3005                        struct inode_record *rec)
3006 {
3007         struct inode_backref *backref;
3008         struct inode_backref *tmp;
3009         struct btrfs_key key;
3010         struct btrfs_inode_item *inode_item;
3011         int ret = 0;
3012
3013         /* We don't believe this either, reset it and iterate backref */
3014         rec->found_link = 0;
3015
3016         /* Remove all backref including the valid ones */
3017         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3018                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3019                                    backref->index, backref->name,
3020                                    backref->namelen, 0);
3021                 if (ret < 0)
3022                         goto out;
3023
3024                 /* remove invalid backref, so it won't be added back */
3025                 if (!(backref->found_dir_index &&
3026                       backref->found_dir_item &&
3027                       backref->found_inode_ref)) {
3028                         list_del(&backref->list);
3029                         free(backref);
3030                 } else {
3031                         rec->found_link++;
3032                 }
3033         }
3034
3035         /* Set nlink to 0 */
3036         key.objectid = rec->ino;
3037         key.type = BTRFS_INODE_ITEM_KEY;
3038         key.offset = 0;
3039         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3040         if (ret < 0)
3041                 goto out;
3042         if (ret > 0) {
3043                 ret = -ENOENT;
3044                 goto out;
3045         }
3046         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3047                                     struct btrfs_inode_item);
3048         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3049         btrfs_mark_buffer_dirty(path->nodes[0]);
3050         btrfs_release_path(path);
3051
3052         /*
3053          * Add back valid inode_ref/dir_item/dir_index,
3054          * add_link() will handle the nlink inc, so new nlink must be correct
3055          */
3056         list_for_each_entry(backref, &rec->backrefs, list) {
3057                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3058                                      backref->name, backref->namelen,
3059                                      backref->filetype, &backref->index, 1, 0);
3060                 if (ret < 0)
3061                         goto out;
3062         }
3063 out:
3064         btrfs_release_path(path);
3065         return ret;
3066 }
3067
3068 static int get_highest_inode(struct btrfs_trans_handle *trans,
3069                                 struct btrfs_root *root,
3070                                 struct btrfs_path *path,
3071                                 u64 *highest_ino)
3072 {
3073         struct btrfs_key key, found_key;
3074         int ret;
3075
3076         btrfs_init_path(path);
3077         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3078         key.offset = -1;
3079         key.type = BTRFS_INODE_ITEM_KEY;
3080         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3081         if (ret == 1) {
3082                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3083                                 path->slots[0] - 1);
3084                 *highest_ino = found_key.objectid;
3085                 ret = 0;
3086         }
3087         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3088                 ret = -EOVERFLOW;
3089         btrfs_release_path(path);
3090         return ret;
3091 }
3092
3093 /*
3094  * Link inode to dir 'lost+found'. Increase @ref_count.
3095  *
3096  * Returns 0 means success.
3097  * Returns <0 means failure.
3098  */
3099 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3100                                    struct btrfs_root *root,
3101                                    struct btrfs_path *path,
3102                                    u64 ino, char *namebuf, u32 name_len,
3103                                    u8 filetype, u64 *ref_count)
3104 {
3105         char *dir_name = "lost+found";
3106         u64 lost_found_ino;
3107         int ret;
3108         u32 mode = 0700;
3109
3110         btrfs_release_path(path);
3111         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3112         if (ret < 0)
3113                 goto out;
3114         lost_found_ino++;
3115
3116         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3117                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3118                           mode);
3119         if (ret < 0) {
3120                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3121                 goto out;
3122         }
3123         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3124                              namebuf, name_len, filetype, NULL, 1, 0);
3125         /*
3126          * Add ".INO" suffix several times to handle case where
3127          * "FILENAME.INO" is already taken by another file.
3128          */
3129         while (ret == -EEXIST) {
3130                 /*
3131                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3132                  */
3133                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3134                         ret = -EFBIG;
3135                         goto out;
3136                 }
3137                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3138                          ".%llu", ino);
3139                 name_len += count_digits(ino) + 1;
3140                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3141                                      name_len, filetype, NULL, 1, 0);
3142         }
3143         if (ret < 0) {
3144                 error("failed to link the inode %llu to %s dir: %s",
3145                       ino, dir_name, strerror(-ret));
3146                 goto out;
3147         }
3148
3149         ++*ref_count;
3150         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3151                name_len, namebuf, dir_name);
3152 out:
3153         btrfs_release_path(path);
3154         if (ret)
3155                 error("failed to move file '%.*s' to '%s' dir", name_len,
3156                                 namebuf, dir_name);
3157         return ret;
3158 }
3159
3160 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3161                                struct btrfs_root *root,
3162                                struct btrfs_path *path,
3163                                struct inode_record *rec)
3164 {
3165         char namebuf[BTRFS_NAME_LEN] = {0};
3166         u8 type = 0;
3167         int namelen = 0;
3168         int name_recovered = 0;
3169         int type_recovered = 0;
3170         int ret = 0;
3171
3172         /*
3173          * Get file name and type first before these invalid inode ref
3174          * are deleted by remove_all_invalid_backref()
3175          */
3176         name_recovered = !find_file_name(rec, namebuf, &namelen);
3177         type_recovered = !find_file_type(rec, &type);
3178
3179         if (!name_recovered) {
3180                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3181                        rec->ino, rec->ino);
3182                 namelen = count_digits(rec->ino);
3183                 sprintf(namebuf, "%llu", rec->ino);
3184                 name_recovered = 1;
3185         }
3186         if (!type_recovered) {
3187                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3188                        rec->ino);
3189                 type = BTRFS_FT_REG_FILE;
3190                 type_recovered = 1;
3191         }
3192
3193         ret = reset_nlink(trans, root, path, rec);
3194         if (ret < 0) {
3195                 fprintf(stderr,
3196                         "Failed to reset nlink for inode %llu: %s\n",
3197                         rec->ino, strerror(-ret));
3198                 goto out;
3199         }
3200
3201         if (rec->found_link == 0) {
3202                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3203                                               namebuf, namelen, type,
3204                                               (u64 *)&rec->found_link);
3205                 if (ret)
3206                         goto out;
3207         }
3208         printf("Fixed the nlink of inode %llu\n", rec->ino);
3209 out:
3210         /*
3211          * Clear the flag anyway, or we will loop forever for the same inode
3212          * as it will not be removed from the bad inode list and the dead loop
3213          * happens.
3214          */
3215         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3216         btrfs_release_path(path);
3217         return ret;
3218 }
3219
3220 /*
3221  * Check if there is any normal(reg or prealloc) file extent for given
3222  * ino.
3223  * This is used to determine the file type when neither its dir_index/item or
3224  * inode_item exists.
3225  *
3226  * This will *NOT* report error, if any error happens, just consider it does
3227  * not have any normal file extent.
3228  */
3229 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3230 {
3231         struct btrfs_path path;
3232         struct btrfs_key key;
3233         struct btrfs_key found_key;
3234         struct btrfs_file_extent_item *fi;
3235         u8 type;
3236         int ret = 0;
3237
3238         btrfs_init_path(&path);
3239         key.objectid = ino;
3240         key.type = BTRFS_EXTENT_DATA_KEY;
3241         key.offset = 0;
3242
3243         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3244         if (ret < 0) {
3245                 ret = 0;
3246                 goto out;
3247         }
3248         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3249                 ret = btrfs_next_leaf(root, &path);
3250                 if (ret) {
3251                         ret = 0;
3252                         goto out;
3253                 }
3254         }
3255         while (1) {
3256                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3257                                       path.slots[0]);
3258                 if (found_key.objectid != ino ||
3259                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3260                         break;
3261                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3262                                     struct btrfs_file_extent_item);
3263                 type = btrfs_file_extent_type(path.nodes[0], fi);
3264                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3265                         ret = 1;
3266                         goto out;
3267                 }
3268         }
3269 out:
3270         btrfs_release_path(&path);
3271         return ret;
3272 }
3273
3274 static u32 btrfs_type_to_imode(u8 type)
3275 {
3276         static u32 imode_by_btrfs_type[] = {
3277                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3278                 [BTRFS_FT_DIR]          = S_IFDIR,
3279                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3280                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3281                 [BTRFS_FT_FIFO]         = S_IFIFO,
3282                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3283                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3284         };
3285
3286         return imode_by_btrfs_type[(type)];
3287 }
3288
3289 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3290                                 struct btrfs_root *root,
3291                                 struct btrfs_path *path,
3292                                 struct inode_record *rec)
3293 {
3294         u8 filetype;
3295         u32 mode = 0700;
3296         int type_recovered = 0;
3297         int ret = 0;
3298
3299         printf("Trying to rebuild inode:%llu\n", rec->ino);
3300
3301         type_recovered = !find_file_type(rec, &filetype);
3302
3303         /*
3304          * Try to determine inode type if type not found.
3305          *
3306          * For found regular file extent, it must be FILE.
3307          * For found dir_item/index, it must be DIR.
3308          *
3309          * For undetermined one, use FILE as fallback.
3310          *
3311          * TODO:
3312          * 1. If found backref(inode_index/item is already handled) to it,
3313          *    it must be DIR.
3314          *    Need new inode-inode ref structure to allow search for that.
3315          */
3316         if (!type_recovered) {
3317                 if (rec->found_file_extent &&
3318                     find_normal_file_extent(root, rec->ino)) {
3319                         type_recovered = 1;
3320                         filetype = BTRFS_FT_REG_FILE;
3321                 } else if (rec->found_dir_item) {
3322                         type_recovered = 1;
3323                         filetype = BTRFS_FT_DIR;
3324                 } else if (!list_empty(&rec->orphan_extents)) {
3325                         type_recovered = 1;
3326                         filetype = BTRFS_FT_REG_FILE;
3327                 } else{
3328                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3329                                rec->ino);
3330                         type_recovered = 1;
3331                         filetype = BTRFS_FT_REG_FILE;
3332                 }
3333         }
3334
3335         ret = btrfs_new_inode(trans, root, rec->ino,
3336                               mode | btrfs_type_to_imode(filetype));
3337         if (ret < 0)
3338                 goto out;
3339
3340         /*
3341          * Here inode rebuild is done, we only rebuild the inode item,
3342          * don't repair the nlink(like move to lost+found).
3343          * That is the job of nlink repair.
3344          *
3345          * We just fill the record and return
3346          */
3347         rec->found_dir_item = 1;
3348         rec->imode = mode | btrfs_type_to_imode(filetype);
3349         rec->nlink = 0;
3350         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3351         /* Ensure the inode_nlinks repair function will be called */
3352         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3353 out:
3354         return ret;
3355 }
3356
3357 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3358                                       struct btrfs_root *root,
3359                                       struct btrfs_path *path,
3360                                       struct inode_record *rec)
3361 {
3362         struct orphan_data_extent *orphan;
3363         struct orphan_data_extent *tmp;
3364         int ret = 0;
3365
3366         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3367                 /*
3368                  * Check for conflicting file extents
3369                  *
3370                  * Here we don't know whether the extents is compressed or not,
3371                  * so we can only assume it not compressed nor data offset,
3372                  * and use its disk_len as extent length.
3373                  */
3374                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3375                                        orphan->offset, orphan->disk_len, 0);
3376                 btrfs_release_path(path);
3377                 if (ret < 0)
3378                         goto out;
3379                 if (!ret) {
3380                         fprintf(stderr,
3381                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3382                                 orphan->disk_bytenr, orphan->disk_len);
3383                         ret = btrfs_free_extent(trans,
3384                                         root->fs_info->extent_root,
3385                                         orphan->disk_bytenr, orphan->disk_len,
3386                                         0, root->objectid, orphan->objectid,
3387                                         orphan->offset);
3388                         if (ret < 0)
3389                                 goto out;
3390                 }
3391                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3392                                 orphan->offset, orphan->disk_bytenr,
3393                                 orphan->disk_len, orphan->disk_len);
3394                 if (ret < 0)
3395                         goto out;
3396
3397                 /* Update file size info */
3398                 rec->found_size += orphan->disk_len;
3399                 if (rec->found_size == rec->nbytes)
3400                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3401
3402                 /* Update the file extent hole info too */
3403                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3404                                            orphan->disk_len);
3405                 if (ret < 0)
3406                         goto out;
3407                 if (RB_EMPTY_ROOT(&rec->holes))
3408                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3409
3410                 list_del(&orphan->list);
3411                 free(orphan);
3412         }
3413         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3414 out:
3415         return ret;
3416 }
3417
3418 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3419                                         struct btrfs_root *root,
3420                                         struct btrfs_path *path,
3421                                         struct inode_record *rec)
3422 {
3423         struct rb_node *node;
3424         struct file_extent_hole *hole;
3425         int found = 0;
3426         int ret = 0;
3427
3428         node = rb_first(&rec->holes);
3429
3430         while (node) {
3431                 found = 1;
3432                 hole = rb_entry(node, struct file_extent_hole, node);
3433                 ret = btrfs_punch_hole(trans, root, rec->ino,
3434                                        hole->start, hole->len);
3435                 if (ret < 0)
3436                         goto out;
3437                 ret = del_file_extent_hole(&rec->holes, hole->start,
3438                                            hole->len);
3439                 if (ret < 0)
3440                         goto out;
3441                 if (RB_EMPTY_ROOT(&rec->holes))
3442                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3443                 node = rb_first(&rec->holes);
3444         }
3445         /* special case for a file losing all its file extent */
3446         if (!found) {
3447                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3448                                        round_up(rec->isize,
3449                                                 root->fs_info->sectorsize));
3450                 if (ret < 0)
3451                         goto out;
3452         }
3453         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3454                rec->ino, root->objectid);
3455 out:
3456         return ret;
3457 }
3458
3459 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3460 {
3461         struct btrfs_trans_handle *trans;
3462         struct btrfs_path path;
3463         int ret = 0;
3464
3465         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3466                              I_ERR_NO_ORPHAN_ITEM |
3467                              I_ERR_LINK_COUNT_WRONG |
3468                              I_ERR_NO_INODE_ITEM |
3469                              I_ERR_FILE_EXTENT_ORPHAN |
3470                              I_ERR_FILE_EXTENT_DISCOUNT|
3471                              I_ERR_FILE_NBYTES_WRONG)))
3472                 return rec->errors;
3473
3474         /*
3475          * For nlink repair, it may create a dir and add link, so
3476          * 2 for parent(256)'s dir_index and dir_item
3477          * 2 for lost+found dir's inode_item and inode_ref
3478          * 1 for the new inode_ref of the file
3479          * 2 for lost+found dir's dir_index and dir_item for the file
3480          */
3481         trans = btrfs_start_transaction(root, 7);
3482         if (IS_ERR(trans))
3483                 return PTR_ERR(trans);
3484
3485         btrfs_init_path(&path);
3486         if (rec->errors & I_ERR_NO_INODE_ITEM)
3487                 ret = repair_inode_no_item(trans, root, &path, rec);
3488         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3489                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3490         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3491                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3492         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3493                 ret = repair_inode_isize(trans, root, &path, rec);
3494         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3495                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3496         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3497                 ret = repair_inode_nlinks(trans, root, &path, rec);
3498         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3499                 ret = repair_inode_nbytes(trans, root, &path, rec);
3500         btrfs_commit_transaction(trans, root);
3501         btrfs_release_path(&path);
3502         return ret;
3503 }
3504
3505 static int check_inode_recs(struct btrfs_root *root,
3506                             struct cache_tree *inode_cache)
3507 {
3508         struct cache_extent *cache;
3509         struct ptr_node *node;
3510         struct inode_record *rec;
3511         struct inode_backref *backref;
3512         int stage = 0;
3513         int ret = 0;
3514         int err = 0;
3515         u64 error = 0;
3516         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3517
3518         if (btrfs_root_refs(&root->root_item) == 0) {
3519                 if (!cache_tree_empty(inode_cache))
3520                         fprintf(stderr, "warning line %d\n", __LINE__);
3521                 return 0;
3522         }
3523
3524         /*
3525          * We need to repair backrefs first because we could change some of the
3526          * errors in the inode recs.
3527          *
3528          * We also need to go through and delete invalid backrefs first and then
3529          * add the correct ones second.  We do this because we may get EEXIST
3530          * when adding back the correct index because we hadn't yet deleted the
3531          * invalid index.
3532          *
3533          * For example, if we were missing a dir index then the directories
3534          * isize would be wrong, so if we fixed the isize to what we thought it
3535          * would be and then fixed the backref we'd still have a invalid fs, so
3536          * we need to add back the dir index and then check to see if the isize
3537          * is still wrong.
3538          */
3539         while (stage < 3) {
3540                 stage++;
3541                 if (stage == 3 && !err)
3542                         break;
3543
3544                 cache = search_cache_extent(inode_cache, 0);
3545                 while (repair && cache) {
3546                         node = container_of(cache, struct ptr_node, cache);
3547                         rec = node->data;
3548                         cache = next_cache_extent(cache);
3549
3550                         /* Need to free everything up and rescan */
3551                         if (stage == 3) {
3552                                 remove_cache_extent(inode_cache, &node->cache);
3553                                 free(node);
3554                                 free_inode_rec(rec);
3555                                 continue;
3556                         }
3557
3558                         if (list_empty(&rec->backrefs))
3559                                 continue;
3560
3561                         ret = repair_inode_backrefs(root, rec, inode_cache,
3562                                                     stage == 1);
3563                         if (ret < 0) {
3564                                 err = ret;
3565                                 stage = 2;
3566                                 break;
3567                         } if (ret > 0) {
3568                                 err = -EAGAIN;
3569                         }
3570                 }
3571         }
3572         if (err)
3573                 return err;
3574
3575         rec = get_inode_rec(inode_cache, root_dirid, 0);
3576         BUG_ON(IS_ERR(rec));
3577         if (rec) {
3578                 ret = check_root_dir(rec);
3579                 if (ret) {
3580                         fprintf(stderr, "root %llu root dir %llu error\n",
3581                                 (unsigned long long)root->root_key.objectid,
3582                                 (unsigned long long)root_dirid);
3583                         print_inode_error(root, rec);
3584                         error++;
3585                 }
3586         } else {
3587                 if (repair) {
3588                         struct btrfs_trans_handle *trans;
3589
3590                         trans = btrfs_start_transaction(root, 1);
3591                         if (IS_ERR(trans)) {
3592                                 err = PTR_ERR(trans);
3593                                 return err;
3594                         }
3595
3596                         fprintf(stderr,
3597                                 "root %llu missing its root dir, recreating\n",
3598                                 (unsigned long long)root->objectid);
3599
3600                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3601                         BUG_ON(ret);
3602
3603                         btrfs_commit_transaction(trans, root);
3604                         return -EAGAIN;
3605                 }
3606
3607                 fprintf(stderr, "root %llu root dir %llu not found\n",
3608                         (unsigned long long)root->root_key.objectid,
3609                         (unsigned long long)root_dirid);
3610         }
3611
3612         while (1) {
3613                 cache = search_cache_extent(inode_cache, 0);
3614                 if (!cache)
3615                         break;
3616                 node = container_of(cache, struct ptr_node, cache);
3617                 rec = node->data;
3618                 remove_cache_extent(inode_cache, &node->cache);
3619                 free(node);
3620                 if (rec->ino == root_dirid ||
3621                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3622                         free_inode_rec(rec);
3623                         continue;
3624                 }
3625
3626                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3627                         ret = check_orphan_item(root, rec->ino);
3628                         if (ret == 0)
3629                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3630                         if (can_free_inode_rec(rec)) {
3631                                 free_inode_rec(rec);
3632                                 continue;
3633                         }
3634                 }
3635
3636                 if (!rec->found_inode_item)
3637                         rec->errors |= I_ERR_NO_INODE_ITEM;
3638                 if (rec->found_link != rec->nlink)
3639                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3640                 if (repair) {
3641                         ret = try_repair_inode(root, rec);
3642                         if (ret == 0 && can_free_inode_rec(rec)) {
3643                                 free_inode_rec(rec);
3644                                 continue;
3645                         }
3646                         ret = 0;
3647                 }
3648
3649                 if (!(repair && ret == 0))
3650                         error++;
3651                 print_inode_error(root, rec);
3652                 list_for_each_entry(backref, &rec->backrefs, list) {
3653                         if (!backref->found_dir_item)
3654                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3655                         if (!backref->found_dir_index)
3656                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3657                         if (!backref->found_inode_ref)
3658                                 backref->errors |= REF_ERR_NO_INODE_REF;
3659                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3660                                 " namelen %u name %s filetype %d errors %x",
3661                                 (unsigned long long)backref->dir,
3662                                 (unsigned long long)backref->index,
3663                                 backref->namelen, backref->name,
3664                                 backref->filetype, backref->errors);
3665                         print_ref_error(backref->errors);
3666                 }
3667                 free_inode_rec(rec);
3668         }
3669         return (error > 0) ? -1 : 0;
3670 }
3671
3672 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3673                                         u64 objectid)
3674 {
3675         struct cache_extent *cache;
3676         struct root_record *rec = NULL;
3677         int ret;
3678
3679         cache = lookup_cache_extent(root_cache, objectid, 1);
3680         if (cache) {
3681                 rec = container_of(cache, struct root_record, cache);
3682         } else {
3683                 rec = calloc(1, sizeof(*rec));
3684                 if (!rec)
3685                         return ERR_PTR(-ENOMEM);
3686                 rec->objectid = objectid;
3687                 INIT_LIST_HEAD(&rec->backrefs);
3688                 rec->cache.start = objectid;
3689                 rec->cache.size = 1;
3690
3691                 ret = insert_cache_extent(root_cache, &rec->cache);
3692                 if (ret)
3693                         return ERR_PTR(-EEXIST);
3694         }
3695         return rec;
3696 }
3697
3698 static struct root_backref *get_root_backref(struct root_record *rec,
3699                                              u64 ref_root, u64 dir, u64 index,
3700                                              const char *name, int namelen)
3701 {
3702         struct root_backref *backref;
3703
3704         list_for_each_entry(backref, &rec->backrefs, list) {
3705                 if (backref->ref_root != ref_root || backref->dir != dir ||
3706                     backref->namelen != namelen)
3707                         continue;
3708                 if (memcmp(name, backref->name, namelen))
3709                         continue;
3710                 return backref;
3711         }
3712
3713         backref = calloc(1, sizeof(*backref) + namelen + 1);
3714         if (!backref)
3715                 return NULL;
3716         backref->ref_root = ref_root;
3717         backref->dir = dir;
3718         backref->index = index;
3719         backref->namelen = namelen;
3720         memcpy(backref->name, name, namelen);
3721         backref->name[namelen] = '\0';
3722         list_add_tail(&backref->list, &rec->backrefs);
3723         return backref;
3724 }
3725
3726 static void free_root_record(struct cache_extent *cache)
3727 {
3728         struct root_record *rec;
3729         struct root_backref *backref;
3730
3731         rec = container_of(cache, struct root_record, cache);
3732         while (!list_empty(&rec->backrefs)) {
3733                 backref = to_root_backref(rec->backrefs.next);
3734                 list_del(&backref->list);
3735                 free(backref);
3736         }
3737
3738         free(rec);
3739 }
3740
3741 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3742
3743 static int add_root_backref(struct cache_tree *root_cache,
3744                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3745                             const char *name, int namelen,
3746                             int item_type, int errors)
3747 {
3748         struct root_record *rec;
3749         struct root_backref *backref;
3750
3751         rec = get_root_rec(root_cache, root_id);
3752         BUG_ON(IS_ERR(rec));
3753         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3754         BUG_ON(!backref);
3755
3756         backref->errors |= errors;
3757
3758         if (item_type != BTRFS_DIR_ITEM_KEY) {
3759                 if (backref->found_dir_index || backref->found_back_ref ||
3760                     backref->found_forward_ref) {
3761                         if (backref->index != index)
3762                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3763                 } else {
3764                         backref->index = index;
3765                 }
3766         }
3767
3768         if (item_type == BTRFS_DIR_ITEM_KEY) {
3769                 if (backref->found_forward_ref)
3770                         rec->found_ref++;
3771                 backref->found_dir_item = 1;
3772         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3773                 backref->found_dir_index = 1;
3774         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3775                 if (backref->found_forward_ref)
3776                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3777                 else if (backref->found_dir_item)
3778                         rec->found_ref++;
3779                 backref->found_forward_ref = 1;
3780         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3781                 if (backref->found_back_ref)
3782                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3783                 backref->found_back_ref = 1;
3784         } else {
3785                 BUG_ON(1);
3786         }
3787
3788         if (backref->found_forward_ref && backref->found_dir_item)
3789                 backref->reachable = 1;
3790         return 0;
3791 }
3792
3793 static int merge_root_recs(struct btrfs_root *root,
3794                            struct cache_tree *src_cache,
3795                            struct cache_tree *dst_cache)
3796 {
3797         struct cache_extent *cache;
3798         struct ptr_node *node;
3799         struct inode_record *rec;
3800         struct inode_backref *backref;
3801         int ret = 0;
3802
3803         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3804                 free_inode_recs_tree(src_cache);
3805                 return 0;
3806         }
3807
3808         while (1) {
3809                 cache = search_cache_extent(src_cache, 0);
3810                 if (!cache)
3811                         break;
3812                 node = container_of(cache, struct ptr_node, cache);
3813                 rec = node->data;
3814                 remove_cache_extent(src_cache, &node->cache);
3815                 free(node);
3816
3817                 ret = is_child_root(root, root->objectid, rec->ino);
3818                 if (ret < 0)
3819                         break;
3820                 else if (ret == 0)
3821                         goto skip;
3822
3823                 list_for_each_entry(backref, &rec->backrefs, list) {
3824                         BUG_ON(backref->found_inode_ref);
3825                         if (backref->found_dir_item)
3826                                 add_root_backref(dst_cache, rec->ino,
3827                                         root->root_key.objectid, backref->dir,
3828                                         backref->index, backref->name,
3829                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3830                                         backref->errors);
3831                         if (backref->found_dir_index)
3832                                 add_root_backref(dst_cache, rec->ino,
3833                                         root->root_key.objectid, backref->dir,
3834                                         backref->index, backref->name,
3835                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3836                                         backref->errors);
3837                 }
3838 skip:
3839                 free_inode_rec(rec);
3840         }
3841         if (ret < 0)
3842                 return ret;
3843         return 0;
3844 }
3845
3846 static int check_root_refs(struct btrfs_root *root,
3847                            struct cache_tree *root_cache)
3848 {
3849         struct root_record *rec;
3850         struct root_record *ref_root;
3851         struct root_backref *backref;
3852         struct cache_extent *cache;
3853         int loop = 1;
3854         int ret;
3855         int error;
3856         int errors = 0;
3857
3858         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3859         BUG_ON(IS_ERR(rec));
3860         rec->found_ref = 1;
3861
3862         /* fixme: this can not detect circular references */
3863         while (loop) {
3864                 loop = 0;
3865                 cache = search_cache_extent(root_cache, 0);
3866                 while (1) {
3867                         if (!cache)
3868                                 break;
3869                         rec = container_of(cache, struct root_record, cache);
3870                         cache = next_cache_extent(cache);
3871
3872                         if (rec->found_ref == 0)
3873                                 continue;
3874
3875                         list_for_each_entry(backref, &rec->backrefs, list) {
3876                                 if (!backref->reachable)
3877                                         continue;
3878
3879                                 ref_root = get_root_rec(root_cache,
3880                                                         backref->ref_root);
3881                                 BUG_ON(IS_ERR(ref_root));
3882                                 if (ref_root->found_ref > 0)
3883                                         continue;
3884
3885                                 backref->reachable = 0;
3886                                 rec->found_ref--;
3887                                 if (rec->found_ref == 0)
3888                                         loop = 1;
3889                         }
3890                 }
3891         }
3892
3893         cache = search_cache_extent(root_cache, 0);
3894         while (1) {
3895                 if (!cache)
3896                         break;
3897                 rec = container_of(cache, struct root_record, cache);
3898                 cache = next_cache_extent(cache);
3899
3900                 if (rec->found_ref == 0 &&
3901                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3902                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3903                         ret = check_orphan_item(root->fs_info->tree_root,
3904                                                 rec->objectid);
3905                         if (ret == 0)
3906                                 continue;
3907
3908                         /*
3909                          * If we don't have a root item then we likely just have
3910                          * a dir item in a snapshot for this root but no actual
3911                          * ref key or anything so it's meaningless.
3912                          */
3913                         if (!rec->found_root_item)
3914                                 continue;
3915                         errors++;
3916                         fprintf(stderr, "fs tree %llu not referenced\n",
3917                                 (unsigned long long)rec->objectid);
3918                 }
3919
3920                 error = 0;
3921                 if (rec->found_ref > 0 && !rec->found_root_item)
3922                         error = 1;
3923                 list_for_each_entry(backref, &rec->backrefs, list) {
3924                         if (!backref->found_dir_item)
3925                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3926                         if (!backref->found_dir_index)
3927                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3928                         if (!backref->found_back_ref)
3929                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3930                         if (!backref->found_forward_ref)
3931                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3932                         if (backref->reachable && backref->errors)
3933                                 error = 1;
3934                 }
3935                 if (!error)
3936                         continue;
3937
3938                 errors++;
3939                 fprintf(stderr, "fs tree %llu refs %u %s\n",
3940                         (unsigned long long)rec->objectid, rec->found_ref,
3941                          rec->found_root_item ? "" : "not found");
3942
3943                 list_for_each_entry(backref, &rec->backrefs, list) {
3944                         if (!backref->reachable)
3945                                 continue;
3946                         if (!backref->errors && rec->found_root_item)
3947                                 continue;
3948                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
3949                                 " index %llu namelen %u name %s errors %x\n",
3950                                 (unsigned long long)backref->ref_root,
3951                                 (unsigned long long)backref->dir,
3952                                 (unsigned long long)backref->index,
3953                                 backref->namelen, backref->name,
3954                                 backref->errors);
3955                         print_ref_error(backref->errors);
3956                 }
3957         }
3958         return errors > 0 ? 1 : 0;
3959 }
3960
3961 static int process_root_ref(struct extent_buffer *eb, int slot,
3962                             struct btrfs_key *key,
3963                             struct cache_tree *root_cache)
3964 {
3965         u64 dirid;
3966         u64 index;
3967         u32 len;
3968         u32 name_len;
3969         struct btrfs_root_ref *ref;
3970         char namebuf[BTRFS_NAME_LEN];
3971         int error;
3972
3973         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
3974
3975         dirid = btrfs_root_ref_dirid(eb, ref);
3976         index = btrfs_root_ref_sequence(eb, ref);
3977         name_len = btrfs_root_ref_name_len(eb, ref);
3978
3979         if (name_len <= BTRFS_NAME_LEN) {
3980                 len = name_len;
3981                 error = 0;
3982         } else {
3983                 len = BTRFS_NAME_LEN;
3984                 error = REF_ERR_NAME_TOO_LONG;
3985         }
3986         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
3987
3988         if (key->type == BTRFS_ROOT_REF_KEY) {
3989                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
3990                                  index, namebuf, len, key->type, error);
3991         } else {
3992                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
3993                                  index, namebuf, len, key->type, error);
3994         }
3995         return 0;
3996 }
3997
3998 static void free_corrupt_block(struct cache_extent *cache)
3999 {
4000         struct btrfs_corrupt_block *corrupt;
4001
4002         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4003         free(corrupt);
4004 }
4005
4006 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4007
4008 /*
4009  * Repair the btree of the given root.
4010  *
4011  * The fix is to remove the node key in corrupt_blocks cache_tree.
4012  * and rebalance the tree.
4013  * After the fix, the btree should be writeable.
4014  */
4015 static int repair_btree(struct btrfs_root *root,
4016                         struct cache_tree *corrupt_blocks)
4017 {
4018         struct btrfs_trans_handle *trans;
4019         struct btrfs_path path;
4020         struct btrfs_corrupt_block *corrupt;
4021         struct cache_extent *cache;
4022         struct btrfs_key key;
4023         u64 offset;
4024         int level;
4025         int ret = 0;
4026
4027         if (cache_tree_empty(corrupt_blocks))
4028                 return 0;
4029
4030         trans = btrfs_start_transaction(root, 1);
4031         if (IS_ERR(trans)) {
4032                 ret = PTR_ERR(trans);
4033                 fprintf(stderr, "Error starting transaction: %s\n",
4034                         strerror(-ret));
4035                 return ret;
4036         }
4037         btrfs_init_path(&path);
4038         cache = first_cache_extent(corrupt_blocks);
4039         while (cache) {
4040                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4041                                        cache);
4042                 level = corrupt->level;
4043                 path.lowest_level = level;
4044                 key.objectid = corrupt->key.objectid;
4045                 key.type = corrupt->key.type;
4046                 key.offset = corrupt->key.offset;
4047
4048                 /*
4049                  * Here we don't want to do any tree balance, since it may
4050                  * cause a balance with corrupted brother leaf/node,
4051                  * so ins_len set to 0 here.
4052                  * Balance will be done after all corrupt node/leaf is deleted.
4053                  */
4054                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4055                 if (ret < 0)
4056                         goto out;
4057                 offset = btrfs_node_blockptr(path.nodes[level],
4058                                              path.slots[level]);
4059
4060                 /* Remove the ptr */
4061                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4062                 if (ret < 0)
4063                         goto out;
4064                 /*
4065                  * Remove the corresponding extent
4066                  * return value is not concerned.
4067                  */
4068                 btrfs_release_path(&path);
4069                 ret = btrfs_free_extent(trans, root, offset,
4070                                 root->fs_info->nodesize, 0,
4071                                 root->root_key.objectid, level - 1, 0);
4072                 cache = next_cache_extent(cache);
4073         }
4074
4075         /* Balance the btree using btrfs_search_slot() */
4076         cache = first_cache_extent(corrupt_blocks);
4077         while (cache) {
4078                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4079                                        cache);
4080                 memcpy(&key, &corrupt->key, sizeof(key));
4081                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4082                 if (ret < 0)
4083                         goto out;
4084                 /* return will always >0 since it won't find the item */
4085                 ret = 0;
4086                 btrfs_release_path(&path);
4087                 cache = next_cache_extent(cache);
4088         }
4089 out:
4090         btrfs_commit_transaction(trans, root);
4091         btrfs_release_path(&path);
4092         return ret;
4093 }
4094
4095 static int check_fs_root(struct btrfs_root *root,
4096                          struct cache_tree *root_cache,
4097                          struct walk_control *wc)
4098 {
4099         int ret = 0;
4100         int err = 0;
4101         int wret;
4102         int level;
4103         struct btrfs_path path;
4104         struct shared_node root_node;
4105         struct root_record *rec;
4106         struct btrfs_root_item *root_item = &root->root_item;
4107         struct cache_tree corrupt_blocks;
4108         struct orphan_data_extent *orphan;
4109         struct orphan_data_extent *tmp;
4110         enum btrfs_tree_block_status status;
4111         struct node_refs nrefs;
4112
4113         /*
4114          * Reuse the corrupt_block cache tree to record corrupted tree block
4115          *
4116          * Unlike the usage in extent tree check, here we do it in a per
4117          * fs/subvol tree base.
4118          */
4119         cache_tree_init(&corrupt_blocks);
4120         root->fs_info->corrupt_blocks = &corrupt_blocks;
4121
4122         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4123                 rec = get_root_rec(root_cache, root->root_key.objectid);
4124                 BUG_ON(IS_ERR(rec));
4125                 if (btrfs_root_refs(root_item) > 0)
4126                         rec->found_root_item = 1;
4127         }
4128
4129         btrfs_init_path(&path);
4130         memset(&root_node, 0, sizeof(root_node));
4131         cache_tree_init(&root_node.root_cache);
4132         cache_tree_init(&root_node.inode_cache);
4133         memset(&nrefs, 0, sizeof(nrefs));
4134
4135         /* Move the orphan extent record to corresponding inode_record */
4136         list_for_each_entry_safe(orphan, tmp,
4137                                  &root->orphan_data_extents, list) {
4138                 struct inode_record *inode;
4139
4140                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4141                                       1);
4142                 BUG_ON(IS_ERR(inode));
4143                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4144                 list_move(&orphan->list, &inode->orphan_extents);
4145         }
4146
4147         level = btrfs_header_level(root->node);
4148         memset(wc->nodes, 0, sizeof(wc->nodes));
4149         wc->nodes[level] = &root_node;
4150         wc->active_node = level;
4151         wc->root_level = level;
4152
4153         /* We may not have checked the root block, lets do that now */
4154         if (btrfs_is_leaf(root->node))
4155                 status = btrfs_check_leaf(root, NULL, root->node);
4156         else
4157                 status = btrfs_check_node(root, NULL, root->node);
4158         if (status != BTRFS_TREE_BLOCK_CLEAN)
4159                 return -EIO;
4160
4161         if (btrfs_root_refs(root_item) > 0 ||
4162             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4163                 path.nodes[level] = root->node;
4164                 extent_buffer_get(root->node);
4165                 path.slots[level] = 0;
4166         } else {
4167                 struct btrfs_key key;
4168                 struct btrfs_disk_key found_key;
4169
4170                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4171                 level = root_item->drop_level;
4172                 path.lowest_level = level;
4173                 if (level > btrfs_header_level(root->node) ||
4174                     level >= BTRFS_MAX_LEVEL) {
4175                         error("ignoring invalid drop level: %u", level);
4176                         goto skip_walking;
4177                 }
4178                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4179                 if (wret < 0)
4180                         goto skip_walking;
4181                 btrfs_node_key(path.nodes[level], &found_key,
4182                                 path.slots[level]);
4183                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4184                                         sizeof(found_key)));
4185         }
4186
4187         while (1) {
4188                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4189                 if (wret < 0)
4190                         ret = wret;
4191                 if (wret != 0)
4192                         break;
4193
4194                 wret = walk_up_tree(root, &path, wc, &level);
4195                 if (wret < 0)
4196                         ret = wret;
4197                 if (wret != 0)
4198                         break;
4199         }
4200 skip_walking:
4201         btrfs_release_path(&path);
4202
4203         if (!cache_tree_empty(&corrupt_blocks)) {
4204                 struct cache_extent *cache;
4205                 struct btrfs_corrupt_block *corrupt;
4206
4207                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4208                        root->root_key.objectid);
4209                 cache = first_cache_extent(&corrupt_blocks);
4210                 while (cache) {
4211                         corrupt = container_of(cache,
4212                                                struct btrfs_corrupt_block,
4213                                                cache);
4214                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4215                                cache->start, corrupt->level,
4216                                corrupt->key.objectid, corrupt->key.type,
4217                                corrupt->key.offset);
4218                         cache = next_cache_extent(cache);
4219                 }
4220                 if (repair) {
4221                         printf("Try to repair the btree for root %llu\n",
4222                                root->root_key.objectid);
4223                         ret = repair_btree(root, &corrupt_blocks);
4224                         if (ret < 0)
4225                                 fprintf(stderr, "Failed to repair btree: %s\n",
4226                                         strerror(-ret));
4227                         if (!ret)
4228                                 printf("Btree for root %llu is fixed\n",
4229                                        root->root_key.objectid);
4230                 }
4231         }
4232
4233         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4234         if (err < 0)
4235                 ret = err;
4236
4237         if (root_node.current) {
4238                 root_node.current->checked = 1;
4239                 maybe_free_inode_rec(&root_node.inode_cache,
4240                                 root_node.current);
4241         }
4242
4243         err = check_inode_recs(root, &root_node.inode_cache);
4244         if (!ret)
4245                 ret = err;
4246
4247         free_corrupt_blocks_tree(&corrupt_blocks);
4248         root->fs_info->corrupt_blocks = NULL;
4249         free_orphan_data_extents(&root->orphan_data_extents);
4250         return ret;
4251 }
4252
4253 static int fs_root_objectid(u64 objectid)
4254 {
4255         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4256             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4257                 return 1;
4258         return is_fstree(objectid);
4259 }
4260
4261 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4262                           struct cache_tree *root_cache)
4263 {
4264         struct btrfs_path path;
4265         struct btrfs_key key;
4266         struct walk_control wc;
4267         struct extent_buffer *leaf, *tree_node;
4268         struct btrfs_root *tmp_root;
4269         struct btrfs_root *tree_root = fs_info->tree_root;
4270         int ret;
4271         int err = 0;
4272
4273         if (ctx.progress_enabled) {
4274                 ctx.tp = TASK_FS_ROOTS;
4275                 task_start(ctx.info);
4276         }
4277
4278         /*
4279          * Just in case we made any changes to the extent tree that weren't
4280          * reflected into the free space cache yet.
4281          */
4282         if (repair)
4283                 reset_cached_block_groups(fs_info);
4284         memset(&wc, 0, sizeof(wc));
4285         cache_tree_init(&wc.shared);
4286         btrfs_init_path(&path);
4287
4288 again:
4289         key.offset = 0;
4290         key.objectid = 0;
4291         key.type = BTRFS_ROOT_ITEM_KEY;
4292         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4293         if (ret < 0) {
4294                 err = 1;
4295                 goto out;
4296         }
4297         tree_node = tree_root->node;
4298         while (1) {
4299                 if (tree_node != tree_root->node) {
4300                         free_root_recs_tree(root_cache);
4301                         btrfs_release_path(&path);
4302                         goto again;
4303                 }
4304                 leaf = path.nodes[0];
4305                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4306                         ret = btrfs_next_leaf(tree_root, &path);
4307                         if (ret) {
4308                                 if (ret < 0)
4309                                         err = 1;
4310                                 break;
4311                         }
4312                         leaf = path.nodes[0];
4313                 }
4314                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4315                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4316                     fs_root_objectid(key.objectid)) {
4317                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4318                                 tmp_root = btrfs_read_fs_root_no_cache(
4319                                                 fs_info, &key);
4320                         } else {
4321                                 key.offset = (u64)-1;
4322                                 tmp_root = btrfs_read_fs_root(
4323                                                 fs_info, &key);
4324                         }
4325                         if (IS_ERR(tmp_root)) {
4326                                 err = 1;
4327                                 goto next;
4328                         }
4329                         ret = check_fs_root(tmp_root, root_cache, &wc);
4330                         if (ret == -EAGAIN) {
4331                                 free_root_recs_tree(root_cache);
4332                                 btrfs_release_path(&path);
4333                                 goto again;
4334                         }
4335                         if (ret)
4336                                 err = 1;
4337                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4338                                 btrfs_free_fs_root(tmp_root);
4339                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4340                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4341                         process_root_ref(leaf, path.slots[0], &key,
4342                                          root_cache);
4343                 }
4344 next:
4345                 path.slots[0]++;
4346         }
4347 out:
4348         btrfs_release_path(&path);
4349         if (err)
4350                 free_extent_cache_tree(&wc.shared);
4351         if (!cache_tree_empty(&wc.shared))
4352                 fprintf(stderr, "warning line %d\n", __LINE__);
4353
4354         task_stop(ctx.info);
4355
4356         return err;
4357 }
4358
4359 /*
4360  * Find the @index according by @ino and name.
4361  * Notice:time efficiency is O(N)
4362  *
4363  * @root:       the root of the fs/file tree
4364  * @index_ret:  the index as return value
4365  * @namebuf:    the name to match
4366  * @name_len:   the length of name to match
4367  * @file_type:  the file_type of INODE_ITEM to match
4368  *
4369  * Returns 0 if found and *@index_ret will be modified with right value
4370  * Returns< 0 not found and *@index_ret will be (u64)-1
4371  */
4372 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4373                           u64 *index_ret, char *namebuf, u32 name_len,
4374                           u8 file_type)
4375 {
4376         struct btrfs_path path;
4377         struct extent_buffer *node;
4378         struct btrfs_dir_item *di;
4379         struct btrfs_key key;
4380         struct btrfs_key location;
4381         char name[BTRFS_NAME_LEN] = {0};
4382
4383         u32 total;
4384         u32 cur = 0;
4385         u32 len;
4386         u32 data_len;
4387         u8 filetype;
4388         int slot;
4389         int ret;
4390
4391         ASSERT(index_ret);
4392
4393         /* search from the last index */
4394         key.objectid = dirid;
4395         key.offset = (u64)-1;
4396         key.type = BTRFS_DIR_INDEX_KEY;
4397
4398         btrfs_init_path(&path);
4399         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4400         if (ret < 0)
4401                 return ret;
4402
4403 loop:
4404         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4405         if (ret) {
4406                 ret = -ENOENT;
4407                 *index_ret = (64)-1;
4408                 goto out;
4409         }
4410         /* Check whether inode_id/filetype/name match */
4411         node = path.nodes[0];
4412         slot = path.slots[0];
4413         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4414         total = btrfs_item_size_nr(node, slot);
4415         while (cur < total) {
4416                 ret = -ENOENT;
4417                 len = btrfs_dir_name_len(node, di);
4418                 data_len = btrfs_dir_data_len(node, di);
4419
4420                 btrfs_dir_item_key_to_cpu(node, di, &location);
4421                 if (location.objectid != location_id ||
4422                     location.type != BTRFS_INODE_ITEM_KEY ||
4423                     location.offset != 0)
4424                         goto next;
4425
4426                 filetype = btrfs_dir_type(node, di);
4427                 if (file_type != filetype)
4428                         goto next;
4429
4430                 if (len > BTRFS_NAME_LEN)
4431                         len = BTRFS_NAME_LEN;
4432
4433                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4434                 if (len != name_len || strncmp(namebuf, name, len))
4435                         goto next;
4436
4437                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4438                 *index_ret = key.offset;
4439                 ret = 0;
4440                 goto out;
4441 next:
4442                 len += sizeof(*di) + data_len;
4443                 di = (struct btrfs_dir_item *)((char *)di + len);
4444                 cur += len;
4445         }
4446         goto loop;
4447
4448 out:
4449         btrfs_release_path(&path);
4450         return ret;
4451 }
4452
4453 /*
4454  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4455  * INODE_REF/INODE_EXTREF match.
4456  *
4457  * @root:       the root of the fs/file tree
4458  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4459  *              value while find index
4460  * @location_key: location key of the struct btrfs_dir_item to match
4461  * @name:       the name to match
4462  * @namelen:    the length of name
4463  * @file_type:  the type of file to math
4464  *
4465  * Return 0 if no error occurred.
4466  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4467  * DIR_ITEM/DIR_INDEX
4468  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4469  * and DIR_ITEM/DIR_INDEX mismatch
4470  */
4471 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4472                          struct btrfs_key *location_key, char *name,
4473                          u32 namelen, u8 file_type)
4474 {
4475         struct btrfs_path path;
4476         struct extent_buffer *node;
4477         struct btrfs_dir_item *di;
4478         struct btrfs_key location;
4479         char namebuf[BTRFS_NAME_LEN] = {0};
4480         u32 total;
4481         u32 cur = 0;
4482         u32 len;
4483         u32 data_len;
4484         u8 filetype;
4485         int slot;
4486         int ret;
4487
4488         /* get the index by traversing all index */
4489         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4490                 ret = find_dir_index(root, key->objectid,
4491                                      location_key->objectid, &key->offset,
4492                                      name, namelen, file_type);
4493                 if (ret)
4494                         ret = DIR_INDEX_MISSING;
4495                 return ret;
4496         }
4497
4498         btrfs_init_path(&path);
4499         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4500         if (ret) {
4501                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4502                         DIR_INDEX_MISSING;
4503                 goto out;
4504         }
4505
4506         /* Check whether inode_id/filetype/name match */
4507         node = path.nodes[0];
4508         slot = path.slots[0];
4509         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4510         total = btrfs_item_size_nr(node, slot);
4511         while (cur < total) {
4512                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4513                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4514
4515                 len = btrfs_dir_name_len(node, di);
4516                 data_len = btrfs_dir_data_len(node, di);
4517
4518                 btrfs_dir_item_key_to_cpu(node, di, &location);
4519                 if (location.objectid != location_key->objectid ||
4520                     location.type != location_key->type ||
4521                     location.offset != location_key->offset)
4522                         goto next;
4523
4524                 filetype = btrfs_dir_type(node, di);
4525                 if (file_type != filetype)
4526                         goto next;
4527
4528                 if (len > BTRFS_NAME_LEN) {
4529                         len = BTRFS_NAME_LEN;
4530                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4531                         root->objectid,
4532                         key->type == BTRFS_DIR_ITEM_KEY ?
4533                         "DIR_ITEM" : "DIR_INDEX",
4534                         key->objectid, key->offset, len);
4535                 }
4536                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4537                                    len);
4538                 if (len != namelen || strncmp(namebuf, name, len))
4539                         goto next;
4540
4541                 ret = 0;
4542                 goto out;
4543 next:
4544                 len += sizeof(*di) + data_len;
4545                 di = (struct btrfs_dir_item *)((char *)di + len);
4546                 cur += len;
4547         }
4548
4549 out:
4550         btrfs_release_path(&path);
4551         return ret;
4552 }
4553
4554 /*
4555  * Prints inode ref error message
4556  */
4557 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4558                                 u64 index, const char *namebuf, int name_len,
4559                                 u8 filetype, int err)
4560 {
4561         if (!err)
4562                 return;
4563
4564         /* root dir error */
4565         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4566                 error(
4567         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4568                       root->objectid, key->objectid, key->offset, namebuf);
4569                 return;
4570         }
4571
4572         /* normal error */
4573         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4574                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4575                       root->objectid, key->offset,
4576                       btrfs_name_hash(namebuf, name_len),
4577                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4578                       namebuf, filetype);
4579         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4580                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4581                       root->objectid, key->offset, index,
4582                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4583                       namebuf, filetype);
4584 }
4585
4586 /*
4587  * Insert the missing inode item.
4588  *
4589  * Returns 0 means success.
4590  * Returns <0 means error.
4591  */
4592 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4593                                      u8 filetype)
4594 {
4595         struct btrfs_key key;
4596         struct btrfs_trans_handle *trans;
4597         struct btrfs_path path;
4598         int ret;
4599
4600         key.objectid = ino;
4601         key.type = BTRFS_INODE_ITEM_KEY;
4602         key.offset = 0;
4603
4604         btrfs_init_path(&path);
4605         trans = btrfs_start_transaction(root, 1);
4606         if (IS_ERR(trans)) {
4607                 ret = -EIO;
4608                 goto out;
4609         }
4610
4611         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4612         if (ret < 0 || !ret)
4613                 goto fail;
4614
4615         /* insert inode item */
4616         create_inode_item_lowmem(trans, root, ino, filetype);
4617         ret = 0;
4618 fail:
4619         btrfs_commit_transaction(trans, root);
4620 out:
4621         if (ret)
4622                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4623                       root->objectid, ino);
4624         btrfs_release_path(&path);
4625         return ret;
4626 }
4627
4628 /*
4629  * The ternary means dir item, dir index and relative inode ref.
4630  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4631  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4632  * strategy:
4633  * If two of three is missing or mismatched, delete the existing one.
4634  * If one of three is missing or mismatched, add the missing one.
4635  *
4636  * returns 0 means success.
4637  * returns not 0 means on error;
4638  */
4639 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4640                           u64 index, char *name, int name_len, u8 filetype,
4641                           int err)
4642 {
4643         struct btrfs_trans_handle *trans;
4644         int stage = 0;
4645         int ret = 0;
4646
4647         /*
4648          * stage shall be one of following valild values:
4649          *      0: Fine, nothing to do.
4650          *      1: One of three is wrong, so add missing one.
4651          *      2: Two of three is wrong, so delete existed one.
4652          */
4653         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4654                 stage++;
4655         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4656                 stage++;
4657         if (err & (INODE_REF_MISSING))
4658                 stage++;
4659
4660         /* stage must be smllarer than 3 */
4661         ASSERT(stage < 3);
4662
4663         trans = btrfs_start_transaction(root, 1);
4664         if (stage == 2) {
4665                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4666                                    name_len, 0);
4667                 goto out;
4668         }
4669         if (stage == 1) {
4670                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4671                                filetype, &index, 1, 1);
4672                 goto out;
4673         }
4674 out:
4675         btrfs_commit_transaction(trans, root);
4676
4677         if (ret)
4678                 error("fail to repair inode %llu name %s filetype %u",
4679                       ino, name, filetype);
4680         else
4681                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4682                        stage == 2 ? "Delete" : "Add",
4683                        ino, name, filetype);
4684
4685         return ret;
4686 }
4687
4688 /*
4689  * Traverse the given INODE_REF and call find_dir_item() to find related
4690  * DIR_ITEM/DIR_INDEX.
4691  *
4692  * @root:       the root of the fs/file tree
4693  * @ref_key:    the key of the INODE_REF
4694  * @path        the path provides node and slot
4695  * @refs:       the count of INODE_REF
4696  * @mode:       the st_mode of INODE_ITEM
4697  * @name_ret:   returns with the first ref's name
4698  * @name_len_ret:    len of the name_ret
4699  *
4700  * Return 0 if no error occurred.
4701  */
4702 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4703                            struct btrfs_path *path, char *name_ret,
4704                            u32 *namelen_ret, u64 *refs_ret, int mode)
4705 {
4706         struct btrfs_key key;
4707         struct btrfs_key location;
4708         struct btrfs_inode_ref *ref;
4709         struct extent_buffer *node;
4710         char namebuf[BTRFS_NAME_LEN] = {0};
4711         u32 total;
4712         u32 cur = 0;
4713         u32 len;
4714         u32 name_len;
4715         u64 index;
4716         int ret;
4717         int err = 0;
4718         int tmp_err;
4719         int slot;
4720         int need_research = 0;
4721         u64 refs;
4722
4723 begin:
4724         err = 0;
4725         cur = 0;
4726         refs = *refs_ret;
4727
4728         /* since after repair, path and the dir item may be changed */
4729         if (need_research) {
4730                 need_research = 0;
4731                 btrfs_release_path(path);
4732                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4733                 /* the item was deleted, let path point to the last checked item */
4734                 if (ret > 0) {
4735                         if (path->slots[0] == 0)
4736                                 btrfs_prev_leaf(root, path);
4737                         else
4738                                 path->slots[0]--;
4739                 }
4740                 if (ret)
4741                         goto out;
4742         }
4743
4744         location.objectid = ref_key->objectid;
4745         location.type = BTRFS_INODE_ITEM_KEY;
4746         location.offset = 0;
4747         node = path->nodes[0];
4748         slot = path->slots[0];
4749
4750         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4751         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4752         total = btrfs_item_size_nr(node, slot);
4753
4754 next:
4755         /* Update inode ref count */
4756         refs++;
4757         tmp_err = 0;
4758         index = btrfs_inode_ref_index(node, ref);
4759         name_len = btrfs_inode_ref_name_len(node, ref);
4760
4761         if (name_len <= BTRFS_NAME_LEN) {
4762                 len = name_len;
4763         } else {
4764                 len = BTRFS_NAME_LEN;
4765                 warning("root %llu INODE_REF[%llu %llu] name too long",
4766                         root->objectid, ref_key->objectid, ref_key->offset);
4767         }
4768
4769         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4770
4771         /* copy the first name found to name_ret */
4772         if (refs == 1 && name_ret) {
4773                 memcpy(name_ret, namebuf, len);
4774                 *namelen_ret = len;
4775         }
4776
4777         /* Check root dir ref */
4778         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4779                 if (index != 0 || len != strlen("..") ||
4780                     strncmp("..", namebuf, len) ||
4781                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4782                         /* set err bits then repair will delete the ref */
4783                         err |= DIR_INDEX_MISSING;
4784                         err |= DIR_ITEM_MISSING;
4785                 }
4786                 goto end;
4787         }
4788
4789         /* Find related DIR_INDEX */
4790         key.objectid = ref_key->offset;
4791         key.type = BTRFS_DIR_INDEX_KEY;
4792         key.offset = index;
4793         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4794                             imode_to_type(mode));
4795
4796         /* Find related dir_item */
4797         key.objectid = ref_key->offset;
4798         key.type = BTRFS_DIR_ITEM_KEY;
4799         key.offset = btrfs_name_hash(namebuf, len);
4800         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4801                             imode_to_type(mode));
4802 end:
4803         if (tmp_err && repair) {
4804                 ret = repair_ternary_lowmem(root, ref_key->offset,
4805                                             ref_key->objectid, index, namebuf,
4806                                             name_len, imode_to_type(mode),
4807                                             tmp_err);
4808                 if (!ret) {
4809                         need_research = 1;
4810                         goto begin;
4811                 }
4812         }
4813         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4814                             imode_to_type(mode), tmp_err);
4815         err |= tmp_err;
4816         len = sizeof(*ref) + name_len;
4817         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4818         cur += len;
4819         if (cur < total)
4820                 goto next;
4821
4822 out:
4823         *refs_ret = refs;
4824         return err;
4825 }
4826
4827 /*
4828  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4829  * DIR_ITEM/DIR_INDEX.
4830  *
4831  * @root:       the root of the fs/file tree
4832  * @ref_key:    the key of the INODE_EXTREF
4833  * @refs:       the count of INODE_EXTREF
4834  * @mode:       the st_mode of INODE_ITEM
4835  *
4836  * Return 0 if no error occurred.
4837  */
4838 static int check_inode_extref(struct btrfs_root *root,
4839                               struct btrfs_key *ref_key,
4840                               struct extent_buffer *node, int slot, u64 *refs,
4841                               int mode)
4842 {
4843         struct btrfs_key key;
4844         struct btrfs_key location;
4845         struct btrfs_inode_extref *extref;
4846         char namebuf[BTRFS_NAME_LEN] = {0};
4847         u32 total;
4848         u32 cur = 0;
4849         u32 len;
4850         u32 name_len;
4851         u64 index;
4852         u64 parent;
4853         int ret;
4854         int err = 0;
4855
4856         location.objectid = ref_key->objectid;
4857         location.type = BTRFS_INODE_ITEM_KEY;
4858         location.offset = 0;
4859
4860         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4861         total = btrfs_item_size_nr(node, slot);
4862
4863 next:
4864         /* update inode ref count */
4865         (*refs)++;
4866         name_len = btrfs_inode_extref_name_len(node, extref);
4867         index = btrfs_inode_extref_index(node, extref);
4868         parent = btrfs_inode_extref_parent(node, extref);
4869         if (name_len <= BTRFS_NAME_LEN) {
4870                 len = name_len;
4871         } else {
4872                 len = BTRFS_NAME_LEN;
4873                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4874                         root->objectid, ref_key->objectid, ref_key->offset);
4875         }
4876         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4877
4878         /* Check root dir ref name */
4879         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4880                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4881                       root->objectid, ref_key->objectid, ref_key->offset,
4882                       namebuf);
4883                 err |= ROOT_DIR_ERROR;
4884         }
4885
4886         /* find related dir_index */
4887         key.objectid = parent;
4888         key.type = BTRFS_DIR_INDEX_KEY;
4889         key.offset = index;
4890         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4891         err |= ret;
4892
4893         /* find related dir_item */
4894         key.objectid = parent;
4895         key.type = BTRFS_DIR_ITEM_KEY;
4896         key.offset = btrfs_name_hash(namebuf, len);
4897         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4898         err |= ret;
4899
4900         len = sizeof(*extref) + name_len;
4901         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4902         cur += len;
4903
4904         if (cur < total)
4905                 goto next;
4906
4907         return err;
4908 }
4909
4910 /*
4911  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4912  * DIR_ITEM/DIR_INDEX match.
4913  * Return with @index_ret.
4914  *
4915  * @root:       the root of the fs/file tree
4916  * @key:        the key of the INODE_REF/INODE_EXTREF
4917  * @name:       the name in the INODE_REF/INODE_EXTREF
4918  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4919  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4920  *              value (64)-1 means do not check index
4921  * @ext_ref:    the EXTENDED_IREF feature
4922  *
4923  * Return 0 if no error occurred.
4924  * Return >0 for error bitmap
4925  */
4926 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4927                           char *name, int namelen, u64 *index_ret,
4928                           unsigned int ext_ref)
4929 {
4930         struct btrfs_path path;
4931         struct btrfs_inode_ref *ref;
4932         struct btrfs_inode_extref *extref;
4933         struct extent_buffer *node;
4934         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4935         u32 total;
4936         u32 cur = 0;
4937         u32 len;
4938         u32 ref_namelen;
4939         u64 ref_index;
4940         u64 parent;
4941         u64 dir_id;
4942         int slot;
4943         int ret;
4944
4945         ASSERT(index_ret);
4946
4947         btrfs_init_path(&path);
4948         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4949         if (ret) {
4950                 ret = INODE_REF_MISSING;
4951                 goto extref;
4952         }
4953
4954         node = path.nodes[0];
4955         slot = path.slots[0];
4956
4957         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4958         total = btrfs_item_size_nr(node, slot);
4959
4960         /* Iterate all entry of INODE_REF */
4961         while (cur < total) {
4962                 ret = INODE_REF_MISSING;
4963
4964                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
4965                 ref_index = btrfs_inode_ref_index(node, ref);
4966                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
4967                         goto next_ref;
4968
4969                 if (cur + sizeof(*ref) + ref_namelen > total ||
4970                     ref_namelen > BTRFS_NAME_LEN) {
4971                         warning("root %llu INODE %s[%llu %llu] name too long",
4972                                 root->objectid,
4973                                 key->type == BTRFS_INODE_REF_KEY ?
4974                                         "REF" : "EXTREF",
4975                                 key->objectid, key->offset);
4976
4977                         if (cur + sizeof(*ref) > total)
4978                                 break;
4979                         len = min_t(u32, total - cur - sizeof(*ref),
4980                                     BTRFS_NAME_LEN);
4981                 } else {
4982                         len = ref_namelen;
4983                 }
4984
4985                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
4986                                    len);
4987
4988                 if (len != namelen || strncmp(ref_namebuf, name, len))
4989                         goto next_ref;
4990
4991                 *index_ret = ref_index;
4992                 ret = 0;
4993                 goto out;
4994 next_ref:
4995                 len = sizeof(*ref) + ref_namelen;
4996                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
4997                 cur += len;
4998         }
4999
5000 extref:
5001         /* Skip if not support EXTENDED_IREF feature */
5002         if (!ext_ref)
5003                 goto out;
5004
5005         btrfs_release_path(&path);
5006         btrfs_init_path(&path);
5007
5008         dir_id = key->offset;
5009         key->type = BTRFS_INODE_EXTREF_KEY;
5010         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5011
5012         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5013         if (ret) {
5014                 ret = INODE_REF_MISSING;
5015                 goto out;
5016         }
5017
5018         node = path.nodes[0];
5019         slot = path.slots[0];
5020
5021         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5022         cur = 0;
5023         total = btrfs_item_size_nr(node, slot);
5024
5025         /* Iterate all entry of INODE_EXTREF */
5026         while (cur < total) {
5027                 ret = INODE_REF_MISSING;
5028
5029                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5030                 ref_index = btrfs_inode_extref_index(node, extref);
5031                 parent = btrfs_inode_extref_parent(node, extref);
5032                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5033                         goto next_extref;
5034
5035                 if (parent != dir_id)
5036                         goto next_extref;
5037
5038                 if (ref_namelen <= BTRFS_NAME_LEN) {
5039                         len = ref_namelen;
5040                 } else {
5041                         len = BTRFS_NAME_LEN;
5042                         warning("root %llu INODE %s[%llu %llu] name too long",
5043                                 root->objectid,
5044                                 key->type == BTRFS_INODE_REF_KEY ?
5045                                         "REF" : "EXTREF",
5046                                 key->objectid, key->offset);
5047                 }
5048                 read_extent_buffer(node, ref_namebuf,
5049                                    (unsigned long)(extref + 1), len);
5050
5051                 if (len != namelen || strncmp(ref_namebuf, name, len))
5052                         goto next_extref;
5053
5054                 *index_ret = ref_index;
5055                 ret = 0;
5056                 goto out;
5057
5058 next_extref:
5059                 len = sizeof(*extref) + ref_namelen;
5060                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5061                 cur += len;
5062
5063         }
5064 out:
5065         btrfs_release_path(&path);
5066         return ret;
5067 }
5068
5069 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5070                                u64 ino, u64 index, const char *namebuf,
5071                                int name_len, u8 filetype, int err)
5072 {
5073         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5074                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5075                       root->objectid, key->objectid, key->offset, namebuf,
5076                       filetype,
5077                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5078         }
5079
5080         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5081                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5082                       root->objectid, key->objectid, index, namebuf, filetype,
5083                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5084         }
5085
5086         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5087                 error(
5088                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5089                       root->objectid, ino, index, namebuf, filetype,
5090                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5091         }
5092
5093         if (err & INODE_REF_MISSING)
5094                 error(
5095                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5096                       root->objectid, ino, key->objectid, namebuf, filetype);
5097
5098 }
5099
5100 /*
5101  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5102  *
5103  * Returns error after repair
5104  */
5105 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5106                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5107                            int err)
5108 {
5109         int ret;
5110
5111         if (err & INODE_ITEM_MISSING) {
5112                 ret = repair_inode_item_missing(root, ino, filetype);
5113                 if (!ret)
5114                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5115         }
5116
5117         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5118                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5119                                             name_len, filetype, err);
5120                 if (!ret) {
5121                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5122                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5123                         err &= ~(INODE_REF_MISSING);
5124                 }
5125         }
5126         return err;
5127 }
5128
5129 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5130                 u64 *size_ret)
5131 {
5132         struct btrfs_key key;
5133         struct btrfs_path path;
5134         u32 len;
5135         struct btrfs_dir_item *di;
5136         int ret;
5137         int cur = 0;
5138         int total = 0;
5139
5140         ASSERT(size_ret);
5141         *size_ret = 0;
5142
5143         key.objectid = ino;
5144         key.type = type;
5145         key.offset = (u64)-1;
5146
5147         btrfs_init_path(&path);
5148         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5149         if (ret < 0) {
5150                 ret = -EIO;
5151                 goto out;
5152         }
5153         /* if found, go to spacial case */
5154         if (ret == 0)
5155                 goto special_case;
5156
5157 loop:
5158         ret = btrfs_previous_item(root, &path, ino, type);
5159
5160         if (ret) {
5161                 ret = 0;
5162                 goto out;
5163         }
5164
5165 special_case:
5166         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5167         cur = 0;
5168         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5169
5170         while (cur < total) {
5171                 len = btrfs_dir_name_len(path.nodes[0], di);
5172                 if (len > BTRFS_NAME_LEN)
5173                         len = BTRFS_NAME_LEN;
5174                 *size_ret += len;
5175
5176                 len += btrfs_dir_data_len(path.nodes[0], di);
5177                 len += sizeof(*di);
5178                 di = (struct btrfs_dir_item *)((char *)di + len);
5179                 cur += len;
5180         }
5181         goto loop;
5182
5183 out:
5184         btrfs_release_path(&path);
5185         return ret;
5186 }
5187
5188 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5189 {
5190         u64 item_size;
5191         u64 index_size;
5192         int ret;
5193
5194         ASSERT(size);
5195         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5196         if (ret)
5197                 goto out;
5198
5199         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5200         if (ret)
5201                 goto out;
5202
5203         *size = item_size + index_size;
5204
5205 out:
5206         if (ret)
5207                 error("failed to count root %llu INODE[%llu] root size",
5208                       root->objectid, ino);
5209         return ret;
5210 }
5211
5212 /*
5213  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5214  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5215  *
5216  * @root:       the root of the fs/file tree
5217  * @key:        the key of the INODE_REF/INODE_EXTREF
5218  * @path:       the path
5219  * @size:       the st_size of the INODE_ITEM
5220  * @ext_ref:    the EXTENDED_IREF feature
5221  *
5222  * Return 0 if no error occurred.
5223  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5224  */
5225 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5226                           struct btrfs_path *path, u64 *size,
5227                           unsigned int ext_ref)
5228 {
5229         struct btrfs_dir_item *di;
5230         struct btrfs_inode_item *ii;
5231         struct btrfs_key key;
5232         struct btrfs_key location;
5233         struct extent_buffer *node;
5234         int slot;
5235         char namebuf[BTRFS_NAME_LEN] = {0};
5236         u32 total;
5237         u32 cur = 0;
5238         u32 len;
5239         u32 name_len;
5240         u32 data_len;
5241         u8 filetype;
5242         u32 mode = 0;
5243         u64 index;
5244         int ret;
5245         int err;
5246         int tmp_err;
5247         int need_research = 0;
5248
5249         /*
5250          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5251          * ignore index check.
5252          */
5253         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5254                 index = di_key->offset;
5255         else
5256                 index = (u64)-1;
5257 begin:
5258         err = 0;
5259         cur = 0;
5260
5261         /* since after repair, path and the dir item may be changed */
5262         if (need_research) {
5263                 need_research = 0;
5264                 err |= DIR_COUNT_AGAIN;
5265                 btrfs_release_path(path);
5266                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5267                 /* the item was deleted, let path point the last checked item */
5268                 if (ret > 0) {
5269                         if (path->slots[0] == 0)
5270                                 btrfs_prev_leaf(root, path);
5271                         else
5272                                 path->slots[0]--;
5273                 }
5274                 if (ret)
5275                         goto out;
5276         }
5277
5278         node = path->nodes[0];
5279         slot = path->slots[0];
5280
5281         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5282         total = btrfs_item_size_nr(node, slot);
5283         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5284
5285         while (cur < total) {
5286                 data_len = btrfs_dir_data_len(node, di);
5287                 tmp_err = 0;
5288                 if (data_len)
5289                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5290                               root->objectid,
5291               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5292                               di_key->objectid, di_key->offset, data_len);
5293
5294                 name_len = btrfs_dir_name_len(node, di);
5295                 if (name_len <= BTRFS_NAME_LEN) {
5296                         len = name_len;
5297                 } else {
5298                         len = BTRFS_NAME_LEN;
5299                         warning("root %llu %s[%llu %llu] name too long",
5300                                 root->objectid,
5301                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5302                                 di_key->objectid, di_key->offset);
5303                 }
5304                 (*size) += name_len;
5305                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5306                                    len);
5307                 filetype = btrfs_dir_type(node, di);
5308
5309                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5310                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5311                         err |= -EIO;
5312                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5313                         root->objectid, di_key->objectid, di_key->offset,
5314                         namebuf, len, filetype, di_key->offset,
5315                         btrfs_name_hash(namebuf, len));
5316                 }
5317
5318                 btrfs_dir_item_key_to_cpu(node, di, &location);
5319                 /* Ignore related ROOT_ITEM check */
5320                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5321                         goto next;
5322
5323                 btrfs_release_path(path);
5324                 /* Check relative INODE_ITEM(existence/filetype) */
5325                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5326                 if (ret) {
5327                         tmp_err |= INODE_ITEM_MISSING;
5328                         goto next;
5329                 }
5330
5331                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5332                                     struct btrfs_inode_item);
5333                 mode = btrfs_inode_mode(path->nodes[0], ii);
5334                 if (imode_to_type(mode) != filetype) {
5335                         tmp_err |= INODE_ITEM_MISMATCH;
5336                         goto next;
5337                 }
5338
5339                 /* Check relative INODE_REF/INODE_EXTREF */
5340                 key.objectid = location.objectid;
5341                 key.type = BTRFS_INODE_REF_KEY;
5342                 key.offset = di_key->objectid;
5343                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5344                                           &index, ext_ref);
5345
5346                 /* check relative INDEX/ITEM */
5347                 key.objectid = di_key->objectid;
5348                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5349                         key.type = BTRFS_DIR_INDEX_KEY;
5350                         key.offset = index;
5351                 } else {
5352                         key.type = BTRFS_DIR_ITEM_KEY;
5353                         key.offset = btrfs_name_hash(namebuf, name_len);
5354                 }
5355
5356                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5357                                          name_len, filetype);
5358                 /* find_dir_item may find index */
5359                 if (key.type == BTRFS_DIR_INDEX_KEY)
5360                         index = key.offset;
5361 next:
5362
5363                 if (tmp_err && repair) {
5364                         ret = repair_dir_item(root, di_key->objectid,
5365                                               location.objectid, index,
5366                                               imode_to_type(mode), namebuf,
5367                                               name_len, tmp_err);
5368                         if (ret != tmp_err) {
5369                                 need_research = 1;
5370                                 goto begin;
5371                         }
5372                 }
5373                 btrfs_release_path(path);
5374                 print_dir_item_err(root, di_key, location.objectid, index,
5375                                    namebuf, name_len, filetype, tmp_err);
5376                 err |= tmp_err;
5377                 len = sizeof(*di) + name_len + data_len;
5378                 di = (struct btrfs_dir_item *)((char *)di + len);
5379                 cur += len;
5380
5381                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5382                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5383                               root->objectid, di_key->objectid,
5384                               di_key->offset);
5385                         break;
5386                 }
5387         }
5388 out:
5389         /* research path */
5390         btrfs_release_path(path);
5391         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5392         if (ret)
5393                 err |= ret > 0 ? -ENOENT : ret;
5394         return err;
5395 }
5396
5397 /*
5398  * Wrapper function of btrfs_punch_hole.
5399  *
5400  * Returns 0 means success.
5401  * Returns not 0 means error.
5402  */
5403 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5404                              u64 len)
5405 {
5406         struct btrfs_trans_handle *trans;
5407         int ret = 0;
5408
5409         trans = btrfs_start_transaction(root, 1);
5410         if (IS_ERR(trans))
5411                 return PTR_ERR(trans);
5412
5413         ret = btrfs_punch_hole(trans, root, ino, start, len);
5414         if (ret)
5415                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5416                       start, len, ino);
5417         else
5418                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5419                        ino);
5420
5421         btrfs_commit_transaction(trans, root);
5422         return ret;
5423 }
5424
5425 /*
5426  * Check file extent datasum/hole, update the size of the file extents,
5427  * check and update the last offset of the file extent.
5428  *
5429  * @root:       the root of fs/file tree.
5430  * @fkey:       the key of the file extent.
5431  * @nodatasum:  INODE_NODATASUM feature.
5432  * @size:       the sum of all EXTENT_DATA items size for this inode.
5433  * @end:        the offset of the last extent.
5434  *
5435  * Return 0 if no error occurred.
5436  */
5437 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5438                              struct extent_buffer *node, int slot,
5439                              unsigned int nodatasum, u64 *size, u64 *end)
5440 {
5441         struct btrfs_file_extent_item *fi;
5442         u64 disk_bytenr;
5443         u64 disk_num_bytes;
5444         u64 extent_num_bytes;
5445         u64 extent_offset;
5446         u64 csum_found;         /* In byte size, sectorsize aligned */
5447         u64 search_start;       /* Logical range start we search for csum */
5448         u64 search_len;         /* Logical range len we search for csum */
5449         unsigned int extent_type;
5450         unsigned int is_hole;
5451         int compressed = 0;
5452         int ret;
5453         int err = 0;
5454
5455         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5456
5457         /* Check inline extent */
5458         extent_type = btrfs_file_extent_type(node, fi);
5459         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5460                 struct btrfs_item *e = btrfs_item_nr(slot);
5461                 u32 item_inline_len;
5462
5463                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5464                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5465                 compressed = btrfs_file_extent_compression(node, fi);
5466                 if (extent_num_bytes == 0) {
5467                         error(
5468                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5469                                 root->objectid, fkey->objectid, fkey->offset);
5470                         err |= FILE_EXTENT_ERROR;
5471                 }
5472                 if (!compressed && extent_num_bytes != item_inline_len) {
5473                         error(
5474                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5475                                 root->objectid, fkey->objectid, fkey->offset,
5476                                 extent_num_bytes, item_inline_len);
5477                         err |= FILE_EXTENT_ERROR;
5478                 }
5479                 *end += extent_num_bytes;
5480                 *size += extent_num_bytes;
5481                 return err;
5482         }
5483
5484         /* Check extent type */
5485         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5486                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5487                 err |= FILE_EXTENT_ERROR;
5488                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5489                       root->objectid, fkey->objectid, fkey->offset);
5490                 return err;
5491         }
5492
5493         /* Check REG_EXTENT/PREALLOC_EXTENT */
5494         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5495         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5496         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5497         extent_offset = btrfs_file_extent_offset(node, fi);
5498         compressed = btrfs_file_extent_compression(node, fi);
5499         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5500
5501         /*
5502          * Check EXTENT_DATA csum
5503          *
5504          * For plain (uncompressed) extent, we should only check the range
5505          * we're referring to, as it's possible that part of prealloc extent
5506          * has been written, and has csum:
5507          *
5508          * |<--- Original large preallocated extent A ---->|
5509          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5510          *      No csum                         Has csum
5511          *
5512          * For compressed extent, we should check the whole range.
5513          */
5514         if (!compressed) {
5515                 search_start = disk_bytenr + extent_offset;
5516                 search_len = extent_num_bytes;
5517         } else {
5518                 search_start = disk_bytenr;
5519                 search_len = disk_num_bytes;
5520         }
5521         ret = count_csum_range(root, search_start, search_len, &csum_found);
5522         if (csum_found > 0 && nodatasum) {
5523                 err |= ODD_CSUM_ITEM;
5524                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5525                       root->objectid, fkey->objectid, fkey->offset);
5526         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5527                    !is_hole && (ret < 0 || csum_found < search_len)) {
5528                 err |= CSUM_ITEM_MISSING;
5529                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5530                       root->objectid, fkey->objectid, fkey->offset,
5531                       csum_found, search_len);
5532         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5533                 err |= ODD_CSUM_ITEM;
5534                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5535                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5536         }
5537
5538         /* Check EXTENT_DATA hole */
5539         if (!no_holes && *end != fkey->offset) {
5540                 if (repair)
5541                         ret = punch_extent_hole(root, fkey->objectid,
5542                                                 *end, fkey->offset - *end);
5543                 if (!repair || ret) {
5544                         err |= FILE_EXTENT_ERROR;
5545                         error(
5546 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5547                                 root->objectid, fkey->objectid, fkey->offset,
5548                                 fkey->objectid, *end);
5549                 }
5550         }
5551
5552         *end += extent_num_bytes;
5553         if (!is_hole)
5554                 *size += extent_num_bytes;
5555
5556         return err;
5557 }
5558
5559 /*
5560  * Set inode item nbytes to @nbytes
5561  *
5562  * Returns  0     on success
5563  * Returns  != 0  on error
5564  */
5565 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5566                                       struct btrfs_path *path,
5567                                       u64 ino, u64 nbytes)
5568 {
5569         struct btrfs_trans_handle *trans;
5570         struct btrfs_inode_item *ii;
5571         struct btrfs_key key;
5572         struct btrfs_key research_key;
5573         int err = 0;
5574         int ret;
5575
5576         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5577
5578         key.objectid = ino;
5579         key.type = BTRFS_INODE_ITEM_KEY;
5580         key.offset = 0;
5581
5582         trans = btrfs_start_transaction(root, 1);
5583         if (IS_ERR(trans)) {
5584                 ret = PTR_ERR(trans);
5585                 err |= ret;
5586                 goto out;
5587         }
5588
5589         btrfs_release_path(path);
5590         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5591         if (ret > 0)
5592                 ret = -ENOENT;
5593         if (ret) {
5594                 err |= ret;
5595                 goto fail;
5596         }
5597
5598         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5599                             struct btrfs_inode_item);
5600         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5601         btrfs_mark_buffer_dirty(path->nodes[0]);
5602 fail:
5603         btrfs_commit_transaction(trans, root);
5604 out:
5605         if (ret)
5606                 error("failed to set nbytes in inode %llu root %llu",
5607                       ino, root->root_key.objectid);
5608         else
5609                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5610                        root->root_key.objectid, nbytes);
5611
5612         /* research path */
5613         btrfs_release_path(path);
5614         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5615         err |= ret;
5616
5617         return err;
5618 }
5619
5620 /*
5621  * Set directory inode isize to @isize.
5622  *
5623  * Returns 0     on success.
5624  * Returns != 0  on error.
5625  */
5626 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5627                                    struct btrfs_path *path,
5628                                    u64 ino, u64 isize)
5629 {
5630         struct btrfs_trans_handle *trans;
5631         struct btrfs_inode_item *ii;
5632         struct btrfs_key key;
5633         struct btrfs_key research_key;
5634         int ret;
5635         int err = 0;
5636
5637         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5638
5639         key.objectid = ino;
5640         key.type = BTRFS_INODE_ITEM_KEY;
5641         key.offset = 0;
5642
5643         trans = btrfs_start_transaction(root, 1);
5644         if (IS_ERR(trans)) {
5645                 ret = PTR_ERR(trans);
5646                 err |= ret;
5647                 goto out;
5648         }
5649
5650         btrfs_release_path(path);
5651         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5652         if (ret > 0)
5653                 ret = -ENOENT;
5654         if (ret) {
5655                 err |= ret;
5656                 goto fail;
5657         }
5658
5659         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5660                             struct btrfs_inode_item);
5661         btrfs_set_inode_size(path->nodes[0], ii, isize);
5662         btrfs_mark_buffer_dirty(path->nodes[0]);
5663 fail:
5664         btrfs_commit_transaction(trans, root);
5665 out:
5666         if (ret)
5667                 error("failed to set isize in inode %llu root %llu",
5668                       ino, root->root_key.objectid);
5669         else
5670                 printf("Set isize in inode %llu root %llu to %llu\n",
5671                        ino, root->root_key.objectid, isize);
5672
5673         btrfs_release_path(path);
5674         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5675         err |= ret;
5676
5677         return err;
5678 }
5679
5680 /*
5681  * Wrapper function for btrfs_add_orphan_item().
5682  *
5683  * Returns 0     on success.
5684  * Returns != 0  on error.
5685  */
5686 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5687                                            struct btrfs_path *path, u64 ino)
5688 {
5689         struct btrfs_trans_handle *trans;
5690         struct btrfs_key research_key;
5691         int ret;
5692         int err = 0;
5693
5694         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5695
5696         trans = btrfs_start_transaction(root, 1);
5697         if (IS_ERR(trans)) {
5698                 ret = PTR_ERR(trans);
5699                 err |= ret;
5700                 goto out;
5701         }
5702
5703         btrfs_release_path(path);
5704         ret = btrfs_add_orphan_item(trans, root, path, ino);
5705         err |= ret;
5706         btrfs_commit_transaction(trans, root);
5707 out:
5708         if (ret)
5709                 error("failed to add inode %llu as orphan item root %llu",
5710                       ino, root->root_key.objectid);
5711         else
5712                 printf("Added inode %llu as orphan item root %llu\n",
5713                        ino, root->root_key.objectid);
5714
5715         btrfs_release_path(path);
5716         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5717         err |= ret;
5718
5719         return err;
5720 }
5721
5722 /* Set inode_item nlink to @ref_count.
5723  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5724  *
5725  * Returns 0 on success
5726  */
5727 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5728                                       struct btrfs_path *path, u64 ino,
5729                                       const char *name, u32 namelen,
5730                                       u64 ref_count, u8 filetype, u64 *nlink)
5731 {
5732         struct btrfs_trans_handle *trans;
5733         struct btrfs_inode_item *ii;
5734         struct btrfs_key key;
5735         struct btrfs_key old_key;
5736         char namebuf[BTRFS_NAME_LEN] = {0};
5737         int name_len;
5738         int ret;
5739         int ret2;
5740
5741         /* save the key */
5742         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5743
5744         if (name && namelen) {
5745                 ASSERT(namelen <= BTRFS_NAME_LEN);
5746                 memcpy(namebuf, name, namelen);
5747                 name_len = namelen;
5748         } else {
5749                 sprintf(namebuf, "%llu", ino);
5750                 name_len = count_digits(ino);
5751                 printf("Can't find file name for inode %llu, use %s instead\n",
5752                        ino, namebuf);
5753         }
5754
5755         trans = btrfs_start_transaction(root, 1);
5756         if (IS_ERR(trans)) {
5757                 ret = PTR_ERR(trans);
5758                 goto out;
5759         }
5760
5761         btrfs_release_path(path);
5762         /* if refs is 0, put it into lostfound */
5763         if (ref_count == 0) {
5764                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5765                                               name_len, filetype, &ref_count);
5766                 if (ret)
5767                         goto fail;
5768         }
5769
5770         /* reset inode_item's nlink to ref_count */
5771         key.objectid = ino;
5772         key.type = BTRFS_INODE_ITEM_KEY;
5773         key.offset = 0;
5774
5775         btrfs_release_path(path);
5776         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5777         if (ret > 0)
5778                 ret = -ENOENT;
5779         if (ret)
5780                 goto fail;
5781
5782         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5783                             struct btrfs_inode_item);
5784         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5785         btrfs_mark_buffer_dirty(path->nodes[0]);
5786
5787         if (nlink)
5788                 *nlink = ref_count;
5789 fail:
5790         btrfs_commit_transaction(trans, root);
5791 out:
5792         if (ret)
5793                 error(
5794         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5795                        root->objectid, ino, namebuf, filetype);
5796         else
5797                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5798                        root->objectid, ino, namebuf, filetype);
5799
5800         /* research */
5801         btrfs_release_path(path);
5802         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5803         if (ret2 < 0)
5804                 return ret |= ret2;
5805         return ret;
5806 }
5807
5808 /*
5809  * Check INODE_ITEM and related ITEMs (the same inode number)
5810  * 1. check link count
5811  * 2. check inode ref/extref
5812  * 3. check dir item/index
5813  *
5814  * @ext_ref:    the EXTENDED_IREF feature
5815  *
5816  * Return 0 if no error occurred.
5817  * Return >0 for error or hit the traversal is done(by error bitmap)
5818  */
5819 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5820                             unsigned int ext_ref)
5821 {
5822         struct extent_buffer *node;
5823         struct btrfs_inode_item *ii;
5824         struct btrfs_key key;
5825         struct btrfs_key last_key;
5826         u64 inode_id;
5827         u32 mode;
5828         u64 nlink;
5829         u64 nbytes;
5830         u64 isize;
5831         u64 size = 0;
5832         u64 refs = 0;
5833         u64 extent_end = 0;
5834         u64 extent_size = 0;
5835         unsigned int dir;
5836         unsigned int nodatasum;
5837         int slot;
5838         int ret;
5839         int err = 0;
5840         char namebuf[BTRFS_NAME_LEN] = {0};
5841         u32 name_len = 0;
5842
5843         node = path->nodes[0];
5844         slot = path->slots[0];
5845
5846         btrfs_item_key_to_cpu(node, &key, slot);
5847         inode_id = key.objectid;
5848
5849         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5850                 ret = btrfs_next_item(root, path);
5851                 if (ret > 0)
5852                         err |= LAST_ITEM;
5853                 return err;
5854         }
5855
5856         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5857         isize = btrfs_inode_size(node, ii);
5858         nbytes = btrfs_inode_nbytes(node, ii);
5859         mode = btrfs_inode_mode(node, ii);
5860         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5861         nlink = btrfs_inode_nlink(node, ii);
5862         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5863
5864         while (1) {
5865                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5866                 ret = btrfs_next_item(root, path);
5867                 if (ret < 0) {
5868                         /* out will fill 'err' rusing current statistics */
5869                         goto out;
5870                 } else if (ret > 0) {
5871                         err |= LAST_ITEM;
5872                         goto out;
5873                 }
5874
5875                 node = path->nodes[0];
5876                 slot = path->slots[0];
5877                 btrfs_item_key_to_cpu(node, &key, slot);
5878                 if (key.objectid != inode_id)
5879                         goto out;
5880
5881                 switch (key.type) {
5882                 case BTRFS_INODE_REF_KEY:
5883                         ret = check_inode_ref(root, &key, path, namebuf,
5884                                               &name_len, &refs, mode);
5885                         err |= ret;
5886                         break;
5887                 case BTRFS_INODE_EXTREF_KEY:
5888                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5889                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5890                                         root->objectid, key.objectid,
5891                                         key.offset);
5892                         ret = check_inode_extref(root, &key, node, slot, &refs,
5893                                                  mode);
5894                         err |= ret;
5895                         break;
5896                 case BTRFS_DIR_ITEM_KEY:
5897                 case BTRFS_DIR_INDEX_KEY:
5898                         if (!dir) {
5899                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5900                                         root->objectid, inode_id,
5901                                         imode_to_type(mode), key.objectid,
5902                                         key.offset);
5903                         }
5904                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5905                         err |= ret;
5906                         break;
5907                 case BTRFS_EXTENT_DATA_KEY:
5908                         if (dir) {
5909                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5910                                         root->objectid, inode_id, key.objectid,
5911                                         key.offset);
5912                         }
5913                         ret = check_file_extent(root, &key, node, slot,
5914                                                 nodatasum, &extent_size,
5915                                                 &extent_end);
5916                         err |= ret;
5917                         break;
5918                 case BTRFS_XATTR_ITEM_KEY:
5919                         break;
5920                 default:
5921                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5922                               key.objectid, key.type, key.offset);
5923                 }
5924         }
5925
5926 out:
5927         if (err & LAST_ITEM) {
5928                 btrfs_release_path(path);
5929                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5930                 if (ret)
5931                         return err;
5932         }
5933
5934         /* verify INODE_ITEM nlink/isize/nbytes */
5935         if (dir) {
5936                 if (repair && (err & DIR_COUNT_AGAIN)) {
5937                         err &= ~DIR_COUNT_AGAIN;
5938                         count_dir_isize(root, inode_id, &size);
5939                 }
5940
5941                 if ((nlink != 1 || refs != 1) && repair) {
5942                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
5943                                 namebuf, name_len, refs, imode_to_type(mode),
5944                                 &nlink);
5945                 }
5946
5947                 if (nlink != 1) {
5948                         err |= LINK_COUNT_ERROR;
5949                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
5950                               root->objectid, inode_id, nlink);
5951                 }
5952
5953                 /*
5954                  * Just a warning, as dir inode nbytes is just an
5955                  * instructive value.
5956                  */
5957                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
5958                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
5959                                 root->objectid, inode_id,
5960                                 root->fs_info->nodesize);
5961                 }
5962
5963                 if (isize != size) {
5964                         if (repair)
5965                                 ret = repair_dir_isize_lowmem(root, path,
5966                                                               inode_id, size);
5967                         if (!repair || ret) {
5968                                 err |= ISIZE_ERROR;
5969                                 error(
5970                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
5971                                       root->objectid, inode_id, isize, size);
5972                         }
5973                 }
5974         } else {
5975                 if (nlink != refs) {
5976                         if (repair)
5977                                 ret = repair_inode_nlinks_lowmem(root, path,
5978                                          inode_id, namebuf, name_len, refs,
5979                                          imode_to_type(mode), &nlink);
5980                         if (!repair || ret) {
5981                                 err |= LINK_COUNT_ERROR;
5982                                 error(
5983                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
5984                                       root->objectid, inode_id, nlink, refs);
5985                         }
5986                 } else if (!nlink) {
5987                         if (repair)
5988                                 ret = repair_inode_orphan_item_lowmem(root,
5989                                                               path, inode_id);
5990                         if (!repair || ret) {
5991                                 err |= ORPHAN_ITEM;
5992                                 error("root %llu INODE[%llu] is orphan item",
5993                                       root->objectid, inode_id);
5994                         }
5995                 }
5996
5997                 if (!nbytes && !no_holes && extent_end < isize) {
5998                         if (repair)
5999                                 ret = punch_extent_hole(root, inode_id,
6000                                                 extent_end, isize - extent_end);
6001                         if (!repair || ret) {
6002                                 err |= NBYTES_ERROR;
6003                                 error(
6004         "root %llu INODE[%llu] size %llu should have a file extent hole",
6005                                       root->objectid, inode_id, isize);
6006                         }
6007                 }
6008
6009                 if (nbytes != extent_size) {
6010                         if (repair)
6011                                 ret = repair_inode_nbytes_lowmem(root, path,
6012                                                          inode_id, extent_size);
6013                         if (!repair || ret) {
6014                                 err |= NBYTES_ERROR;
6015                                 error(
6016         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6017                                       root->objectid, inode_id, nbytes,
6018                                       extent_size);
6019                         }
6020                 }
6021         }
6022
6023         if (err & LAST_ITEM)
6024                 btrfs_next_item(root, path);
6025         return err;
6026 }
6027
6028 /*
6029  * Insert the missing inode item and inode ref.
6030  *
6031  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6032  * Root dir should be handled specially because root dir is the root of fs.
6033  *
6034  * returns err (>0 or 0) after repair
6035  */
6036 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6037 {
6038         struct btrfs_trans_handle *trans;
6039         struct btrfs_key key;
6040         struct btrfs_path path;
6041         int filetype = BTRFS_FT_DIR;
6042         int ret = 0;
6043
6044         btrfs_init_path(&path);
6045
6046         if (err & INODE_REF_MISSING) {
6047                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6048                 key.type = BTRFS_INODE_REF_KEY;
6049                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6050
6051                 trans = btrfs_start_transaction(root, 1);
6052                 if (IS_ERR(trans)) {
6053                         ret = PTR_ERR(trans);
6054                         goto out;
6055                 }
6056
6057                 btrfs_release_path(&path);
6058                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6059                 if (ret)
6060                         goto trans_fail;
6061
6062                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6063                                              BTRFS_FIRST_FREE_OBJECTID,
6064                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6065                 if (ret)
6066                         goto trans_fail;
6067
6068                 printf("Add INODE_REF[%llu %llu] name %s\n",
6069                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6070                        "..");
6071                 err &= ~INODE_REF_MISSING;
6072 trans_fail:
6073                 if (ret)
6074                         error("fail to insert first inode's ref");
6075                 btrfs_commit_transaction(trans, root);
6076         }
6077
6078         if (err & INODE_ITEM_MISSING) {
6079                 ret = repair_inode_item_missing(root,
6080                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6081                 if (ret)
6082                         goto out;
6083                 err &= ~INODE_ITEM_MISSING;
6084         }
6085 out:
6086         if (ret)
6087                 error("fail to repair first inode");
6088         btrfs_release_path(&path);
6089         return err;
6090 }
6091
6092 /*
6093  * check first root dir's inode_item and inode_ref
6094  *
6095  * returns 0 means no error
6096  * returns >0 means error
6097  * returns <0 means fatal error
6098  */
6099 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6100 {
6101         struct btrfs_path path;
6102         struct btrfs_key key;
6103         struct btrfs_inode_item *ii;
6104         u64 index;
6105         u32 mode;
6106         int err = 0;
6107         int ret;
6108
6109         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6110         key.type = BTRFS_INODE_ITEM_KEY;
6111         key.offset = 0;
6112
6113         /* For root being dropped, we don't need to check first inode */
6114         if (btrfs_root_refs(&root->root_item) == 0 &&
6115             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6116             BTRFS_FIRST_FREE_OBJECTID)
6117                 return 0;
6118
6119         btrfs_init_path(&path);
6120         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6121         if (ret < 0)
6122                 goto out;
6123         if (ret > 0) {
6124                 ret = 0;
6125                 err |= INODE_ITEM_MISSING;
6126         } else {
6127                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6128                                     struct btrfs_inode_item);
6129                 mode = btrfs_inode_mode(path.nodes[0], ii);
6130                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6131                         err |= INODE_ITEM_MISMATCH;
6132         }
6133
6134         /* lookup first inode ref */
6135         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6136         key.type = BTRFS_INODE_REF_KEY;
6137         /* special index value */
6138         index = 0;
6139
6140         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6141         if (ret < 0)
6142                 goto out;
6143         err |= ret;
6144
6145 out:
6146         btrfs_release_path(&path);
6147
6148         if (err && repair)
6149                 err = repair_fs_first_inode(root, err);
6150
6151         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6152                 error("root dir INODE_ITEM is %s",
6153                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6154         if (err & INODE_REF_MISSING)
6155                 error("root dir INODE_REF is missing");
6156
6157         return ret < 0 ? ret : err;
6158 }
6159
6160 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6161                                                 u64 parent, u64 root)
6162 {
6163         struct rb_node *node;
6164         struct tree_backref *back = NULL;
6165         struct tree_backref match = {
6166                 .node = {
6167                         .is_data = 0,
6168                 },
6169         };
6170
6171         if (parent) {
6172                 match.parent = parent;
6173                 match.node.full_backref = 1;
6174         } else {
6175                 match.root = root;
6176         }
6177
6178         node = rb_search(&rec->backref_tree, &match.node.node,
6179                          (rb_compare_keys)compare_extent_backref, NULL);
6180         if (node)
6181                 back = to_tree_backref(rb_node_to_extent_backref(node));
6182
6183         return back;
6184 }
6185
6186 static struct data_backref *find_data_backref(struct extent_record *rec,
6187                                                 u64 parent, u64 root,
6188                                                 u64 owner, u64 offset,
6189                                                 int found_ref,
6190                                                 u64 disk_bytenr, u64 bytes)
6191 {
6192         struct rb_node *node;
6193         struct data_backref *back = NULL;
6194         struct data_backref match = {
6195                 .node = {
6196                         .is_data = 1,
6197                 },
6198                 .owner = owner,
6199                 .offset = offset,
6200                 .bytes = bytes,
6201                 .found_ref = found_ref,
6202                 .disk_bytenr = disk_bytenr,
6203         };
6204
6205         if (parent) {
6206                 match.parent = parent;
6207                 match.node.full_backref = 1;
6208         } else {
6209                 match.root = root;
6210         }
6211
6212         node = rb_search(&rec->backref_tree, &match.node.node,
6213                          (rb_compare_keys)compare_extent_backref, NULL);
6214         if (node)
6215                 back = to_data_backref(rb_node_to_extent_backref(node));
6216
6217         return back;
6218 }
6219 /*
6220  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6221  * blocks and integrity of fs tree items.
6222  *
6223  * @root:         the root of the tree to be checked.
6224  * @ext_ref       feature EXTENDED_IREF is enable or not.
6225  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6226  *                otherwise means check fs tree(s) items relationship and
6227  *                @root MUST be a fs tree root.
6228  * Returns 0      represents OK.
6229  * Returns not 0  represents error.
6230  */
6231 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6232                             struct btrfs_root *root, unsigned int ext_ref,
6233                             int check_all)
6234
6235 {
6236         struct btrfs_path path;
6237         struct node_refs nrefs;
6238         struct btrfs_root_item *root_item = &root->root_item;
6239         int ret;
6240         int level;
6241         int err = 0;
6242
6243         memset(&nrefs, 0, sizeof(nrefs));
6244         if (!check_all) {
6245                 /*
6246                  * We need to manually check the first inode item (256)
6247                  * As the following traversal function will only start from
6248                  * the first inode item in the leaf, if inode item (256) is
6249                  * missing we will skip it forever.
6250                  */
6251                 ret = check_fs_first_inode(root, ext_ref);
6252                 if (ret < 0)
6253                         return ret;
6254         }
6255
6256
6257         level = btrfs_header_level(root->node);
6258         btrfs_init_path(&path);
6259
6260         if (btrfs_root_refs(root_item) > 0 ||
6261             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6262                 path.nodes[level] = root->node;
6263                 path.slots[level] = 0;
6264                 extent_buffer_get(root->node);
6265         } else {
6266                 struct btrfs_key key;
6267
6268                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6269                 level = root_item->drop_level;
6270                 path.lowest_level = level;
6271                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6272                 if (ret < 0)
6273                         goto out;
6274                 ret = 0;
6275         }
6276
6277         while (1) {
6278                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6279                                         ext_ref, check_all);
6280
6281                 err |= !!ret;
6282
6283                 /* if ret is negative, walk shall stop */
6284                 if (ret < 0) {
6285                         ret = err;
6286                         break;
6287                 }
6288
6289                 ret = walk_up_tree_v2(root, &path, &level);
6290                 if (ret != 0) {
6291                         /* Normal exit, reset ret to err */
6292                         ret = err;
6293                         break;
6294                 }
6295         }
6296
6297 out:
6298         btrfs_release_path(&path);
6299         return ret;
6300 }
6301
6302 /*
6303  * Iterate all items in the tree and call check_inode_item() to check.
6304  *
6305  * @root:       the root of the tree to be checked.
6306  * @ext_ref:    the EXTENDED_IREF feature
6307  *
6308  * Return 0 if no error found.
6309  * Return <0 for error.
6310  */
6311 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6312 {
6313         reset_cached_block_groups(root->fs_info);
6314         return check_btrfs_root(NULL, root, ext_ref, 0);
6315 }
6316
6317 /*
6318  * Find the relative ref for root_ref and root_backref.
6319  *
6320  * @root:       the root of the root tree.
6321  * @ref_key:    the key of the root ref.
6322  *
6323  * Return 0 if no error occurred.
6324  */
6325 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6326                           struct extent_buffer *node, int slot)
6327 {
6328         struct btrfs_path path;
6329         struct btrfs_key key;
6330         struct btrfs_root_ref *ref;
6331         struct btrfs_root_ref *backref;
6332         char ref_name[BTRFS_NAME_LEN] = {0};
6333         char backref_name[BTRFS_NAME_LEN] = {0};
6334         u64 ref_dirid;
6335         u64 ref_seq;
6336         u32 ref_namelen;
6337         u64 backref_dirid;
6338         u64 backref_seq;
6339         u32 backref_namelen;
6340         u32 len;
6341         int ret;
6342         int err = 0;
6343
6344         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6345         ref_dirid = btrfs_root_ref_dirid(node, ref);
6346         ref_seq = btrfs_root_ref_sequence(node, ref);
6347         ref_namelen = btrfs_root_ref_name_len(node, ref);
6348
6349         if (ref_namelen <= BTRFS_NAME_LEN) {
6350                 len = ref_namelen;
6351         } else {
6352                 len = BTRFS_NAME_LEN;
6353                 warning("%s[%llu %llu] ref_name too long",
6354                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6355                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6356                         ref_key->offset);
6357         }
6358         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6359
6360         /* Find relative root_ref */
6361         key.objectid = ref_key->offset;
6362         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6363         key.offset = ref_key->objectid;
6364
6365         btrfs_init_path(&path);
6366         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6367         if (ret) {
6368                 err |= ROOT_REF_MISSING;
6369                 error("%s[%llu %llu] couldn't find relative ref",
6370                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6371                       "ROOT_REF" : "ROOT_BACKREF",
6372                       ref_key->objectid, ref_key->offset);
6373                 goto out;
6374         }
6375
6376         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6377                                  struct btrfs_root_ref);
6378         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6379         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6380         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6381
6382         if (backref_namelen <= BTRFS_NAME_LEN) {
6383                 len = backref_namelen;
6384         } else {
6385                 len = BTRFS_NAME_LEN;
6386                 warning("%s[%llu %llu] ref_name too long",
6387                         key.type == BTRFS_ROOT_REF_KEY ?
6388                         "ROOT_REF" : "ROOT_BACKREF",
6389                         key.objectid, key.offset);
6390         }
6391         read_extent_buffer(path.nodes[0], backref_name,
6392                            (unsigned long)(backref + 1), len);
6393
6394         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6395             ref_namelen != backref_namelen ||
6396             strncmp(ref_name, backref_name, len)) {
6397                 err |= ROOT_REF_MISMATCH;
6398                 error("%s[%llu %llu] mismatch relative ref",
6399                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6400                       "ROOT_REF" : "ROOT_BACKREF",
6401                       ref_key->objectid, ref_key->offset);
6402         }
6403 out:
6404         btrfs_release_path(&path);
6405         return err;
6406 }
6407
6408 /*
6409  * Check all fs/file tree in low_memory mode.
6410  *
6411  * 1. for fs tree root item, call check_fs_root_v2()
6412  * 2. for fs tree root ref/backref, call check_root_ref()
6413  *
6414  * Return 0 if no error occurred.
6415  */
6416 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6417 {
6418         struct btrfs_root *tree_root = fs_info->tree_root;
6419         struct btrfs_root *cur_root = NULL;
6420         struct btrfs_path path;
6421         struct btrfs_key key;
6422         struct extent_buffer *node;
6423         unsigned int ext_ref;
6424         int slot;
6425         int ret;
6426         int err = 0;
6427
6428         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6429
6430         btrfs_init_path(&path);
6431         key.objectid = BTRFS_FS_TREE_OBJECTID;
6432         key.offset = 0;
6433         key.type = BTRFS_ROOT_ITEM_KEY;
6434
6435         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6436         if (ret < 0) {
6437                 err = ret;
6438                 goto out;
6439         } else if (ret > 0) {
6440                 err = -ENOENT;
6441                 goto out;
6442         }
6443
6444         while (1) {
6445                 node = path.nodes[0];
6446                 slot = path.slots[0];
6447                 btrfs_item_key_to_cpu(node, &key, slot);
6448                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6449                         goto out;
6450                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6451                     fs_root_objectid(key.objectid)) {
6452                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6453                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6454                                                                        &key);
6455                         } else {
6456                                 key.offset = (u64)-1;
6457                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6458                         }
6459
6460                         if (IS_ERR(cur_root)) {
6461                                 error("Fail to read fs/subvol tree: %lld",
6462                                       key.objectid);
6463                                 err = -EIO;
6464                                 goto next;
6465                         }
6466
6467                         ret = check_fs_root_v2(cur_root, ext_ref);
6468                         err |= ret;
6469
6470                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6471                                 btrfs_free_fs_root(cur_root);
6472                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6473                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6474                         ret = check_root_ref(tree_root, &key, node, slot);
6475                         err |= ret;
6476                 }
6477 next:
6478                 ret = btrfs_next_item(tree_root, &path);
6479                 if (ret > 0)
6480                         goto out;
6481                 if (ret < 0) {
6482                         err = ret;
6483                         goto out;
6484                 }
6485         }
6486
6487 out:
6488         btrfs_release_path(&path);
6489         return err;
6490 }
6491
6492 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6493                           struct cache_tree *root_cache)
6494 {
6495         int ret;
6496
6497         if (!ctx.progress_enabled)
6498                 fprintf(stderr, "checking fs roots\n");
6499         if (check_mode == CHECK_MODE_LOWMEM)
6500                 ret = check_fs_roots_v2(fs_info);
6501         else
6502                 ret = check_fs_roots(fs_info, root_cache);
6503
6504         return ret;
6505 }
6506
6507 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6508 {
6509         struct extent_backref *back, *tmp;
6510         struct tree_backref *tback;
6511         struct data_backref *dback;
6512         u64 found = 0;
6513         int err = 0;
6514
6515         rbtree_postorder_for_each_entry_safe(back, tmp,
6516                                              &rec->backref_tree, node) {
6517                 if (!back->found_extent_tree) {
6518                         err = 1;
6519                         if (!print_errs)
6520                                 goto out;
6521                         if (back->is_data) {
6522                                 dback = to_data_backref(back);
6523                                 fprintf(stderr, "Data backref %llu %s %llu"
6524                                         " owner %llu offset %llu num_refs %lu"
6525                                         " not found in extent tree\n",
6526                                         (unsigned long long)rec->start,
6527                                         back->full_backref ?
6528                                         "parent" : "root",
6529                                         back->full_backref ?
6530                                         (unsigned long long)dback->parent:
6531                                         (unsigned long long)dback->root,
6532                                         (unsigned long long)dback->owner,
6533                                         (unsigned long long)dback->offset,
6534                                         (unsigned long)dback->num_refs);
6535                         } else {
6536                                 tback = to_tree_backref(back);
6537                                 fprintf(stderr, "Tree backref %llu parent %llu"
6538                                         " root %llu not found in extent tree\n",
6539                                         (unsigned long long)rec->start,
6540                                         (unsigned long long)tback->parent,
6541                                         (unsigned long long)tback->root);
6542                         }
6543                 }
6544                 if (!back->is_data && !back->found_ref) {
6545                         err = 1;
6546                         if (!print_errs)
6547                                 goto out;
6548                         tback = to_tree_backref(back);
6549                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6550                                 (unsigned long long)rec->start,
6551                                 back->full_backref ? "parent" : "root",
6552                                 back->full_backref ?
6553                                 (unsigned long long)tback->parent :
6554                                 (unsigned long long)tback->root, back);
6555                 }
6556                 if (back->is_data) {
6557                         dback = to_data_backref(back);
6558                         if (dback->found_ref != dback->num_refs) {
6559                                 err = 1;
6560                                 if (!print_errs)
6561                                         goto out;
6562                                 fprintf(stderr, "Incorrect local backref count"
6563                                         " on %llu %s %llu owner %llu"
6564                                         " offset %llu found %u wanted %u back %p\n",
6565                                         (unsigned long long)rec->start,
6566                                         back->full_backref ?
6567                                         "parent" : "root",
6568                                         back->full_backref ?
6569                                         (unsigned long long)dback->parent:
6570                                         (unsigned long long)dback->root,
6571                                         (unsigned long long)dback->owner,
6572                                         (unsigned long long)dback->offset,
6573                                         dback->found_ref, dback->num_refs, back);
6574                         }
6575                         if (dback->disk_bytenr != rec->start) {
6576                                 err = 1;
6577                                 if (!print_errs)
6578                                         goto out;
6579                                 fprintf(stderr, "Backref disk bytenr does not"
6580                                         " match extent record, bytenr=%llu, "
6581                                         "ref bytenr=%llu\n",
6582                                         (unsigned long long)rec->start,
6583                                         (unsigned long long)dback->disk_bytenr);
6584                         }
6585
6586                         if (dback->bytes != rec->nr) {
6587                                 err = 1;
6588                                 if (!print_errs)
6589                                         goto out;
6590                                 fprintf(stderr, "Backref bytes do not match "
6591                                         "extent backref, bytenr=%llu, ref "
6592                                         "bytes=%llu, backref bytes=%llu\n",
6593                                         (unsigned long long)rec->start,
6594                                         (unsigned long long)rec->nr,
6595                                         (unsigned long long)dback->bytes);
6596                         }
6597                 }
6598                 if (!back->is_data) {
6599                         found += 1;
6600                 } else {
6601                         dback = to_data_backref(back);
6602                         found += dback->found_ref;
6603                 }
6604         }
6605         if (found != rec->refs) {
6606                 err = 1;
6607                 if (!print_errs)
6608                         goto out;
6609                 fprintf(stderr, "Incorrect global backref count "
6610                         "on %llu found %llu wanted %llu\n",
6611                         (unsigned long long)rec->start,
6612                         (unsigned long long)found,
6613                         (unsigned long long)rec->refs);
6614         }
6615 out:
6616         return err;
6617 }
6618
6619 static void __free_one_backref(struct rb_node *node)
6620 {
6621         struct extent_backref *back = rb_node_to_extent_backref(node);
6622
6623         free(back);
6624 }
6625
6626 static void free_all_extent_backrefs(struct extent_record *rec)
6627 {
6628         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6629 }
6630
6631 static void free_extent_record_cache(struct cache_tree *extent_cache)
6632 {
6633         struct cache_extent *cache;
6634         struct extent_record *rec;
6635
6636         while (1) {
6637                 cache = first_cache_extent(extent_cache);
6638                 if (!cache)
6639                         break;
6640                 rec = container_of(cache, struct extent_record, cache);
6641                 remove_cache_extent(extent_cache, cache);
6642                 free_all_extent_backrefs(rec);
6643                 free(rec);
6644         }
6645 }
6646
6647 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6648                                  struct extent_record *rec)
6649 {
6650         if (rec->content_checked && rec->owner_ref_checked &&
6651             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6652             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6653             !rec->bad_full_backref && !rec->crossing_stripes &&
6654             !rec->wrong_chunk_type) {
6655                 remove_cache_extent(extent_cache, &rec->cache);
6656                 free_all_extent_backrefs(rec);
6657                 list_del_init(&rec->list);
6658                 free(rec);
6659         }
6660         return 0;
6661 }
6662
6663 static int check_owner_ref(struct btrfs_root *root,
6664                             struct extent_record *rec,
6665                             struct extent_buffer *buf)
6666 {
6667         struct extent_backref *node, *tmp;
6668         struct tree_backref *back;
6669         struct btrfs_root *ref_root;
6670         struct btrfs_key key;
6671         struct btrfs_path path;
6672         struct extent_buffer *parent;
6673         int level;
6674         int found = 0;
6675         int ret;
6676
6677         rbtree_postorder_for_each_entry_safe(node, tmp,
6678                                              &rec->backref_tree, node) {
6679                 if (node->is_data)
6680                         continue;
6681                 if (!node->found_ref)
6682                         continue;
6683                 if (node->full_backref)
6684                         continue;
6685                 back = to_tree_backref(node);
6686                 if (btrfs_header_owner(buf) == back->root)
6687                         return 0;
6688         }
6689         BUG_ON(rec->is_root);
6690
6691         /* try to find the block by search corresponding fs tree */
6692         key.objectid = btrfs_header_owner(buf);
6693         key.type = BTRFS_ROOT_ITEM_KEY;
6694         key.offset = (u64)-1;
6695
6696         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6697         if (IS_ERR(ref_root))
6698                 return 1;
6699
6700         level = btrfs_header_level(buf);
6701         if (level == 0)
6702                 btrfs_item_key_to_cpu(buf, &key, 0);
6703         else
6704                 btrfs_node_key_to_cpu(buf, &key, 0);
6705
6706         btrfs_init_path(&path);
6707         path.lowest_level = level + 1;
6708         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6709         if (ret < 0)
6710                 return 0;
6711
6712         parent = path.nodes[level + 1];
6713         if (parent && buf->start == btrfs_node_blockptr(parent,
6714                                                         path.slots[level + 1]))
6715                 found = 1;
6716
6717         btrfs_release_path(&path);
6718         return found ? 0 : 1;
6719 }
6720
6721 static int is_extent_tree_record(struct extent_record *rec)
6722 {
6723         struct extent_backref *node, *tmp;
6724         struct tree_backref *back;
6725         int is_extent = 0;
6726
6727         rbtree_postorder_for_each_entry_safe(node, tmp,
6728                                              &rec->backref_tree, node) {
6729                 if (node->is_data)
6730                         return 0;
6731                 back = to_tree_backref(node);
6732                 if (node->full_backref)
6733                         return 0;
6734                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6735                         is_extent = 1;
6736         }
6737         return is_extent;
6738 }
6739
6740
6741 static int record_bad_block_io(struct btrfs_fs_info *info,
6742                                struct cache_tree *extent_cache,
6743                                u64 start, u64 len)
6744 {
6745         struct extent_record *rec;
6746         struct cache_extent *cache;
6747         struct btrfs_key key;
6748
6749         cache = lookup_cache_extent(extent_cache, start, len);
6750         if (!cache)
6751                 return 0;
6752
6753         rec = container_of(cache, struct extent_record, cache);
6754         if (!is_extent_tree_record(rec))
6755                 return 0;
6756
6757         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6758         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6759 }
6760
6761 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6762                        struct extent_buffer *buf, int slot)
6763 {
6764         if (btrfs_header_level(buf)) {
6765                 struct btrfs_key_ptr ptr1, ptr2;
6766
6767                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6768                                    sizeof(struct btrfs_key_ptr));
6769                 read_extent_buffer(buf, &ptr2,
6770                                    btrfs_node_key_ptr_offset(slot + 1),
6771                                    sizeof(struct btrfs_key_ptr));
6772                 write_extent_buffer(buf, &ptr1,
6773                                     btrfs_node_key_ptr_offset(slot + 1),
6774                                     sizeof(struct btrfs_key_ptr));
6775                 write_extent_buffer(buf, &ptr2,
6776                                     btrfs_node_key_ptr_offset(slot),
6777                                     sizeof(struct btrfs_key_ptr));
6778                 if (slot == 0) {
6779                         struct btrfs_disk_key key;
6780                         btrfs_node_key(buf, &key, 0);
6781                         btrfs_fixup_low_keys(root, path, &key,
6782                                              btrfs_header_level(buf) + 1);
6783                 }
6784         } else {
6785                 struct btrfs_item *item1, *item2;
6786                 struct btrfs_key k1, k2;
6787                 char *item1_data, *item2_data;
6788                 u32 item1_offset, item2_offset, item1_size, item2_size;
6789
6790                 item1 = btrfs_item_nr(slot);
6791                 item2 = btrfs_item_nr(slot + 1);
6792                 btrfs_item_key_to_cpu(buf, &k1, slot);
6793                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6794                 item1_offset = btrfs_item_offset(buf, item1);
6795                 item2_offset = btrfs_item_offset(buf, item2);
6796                 item1_size = btrfs_item_size(buf, item1);
6797                 item2_size = btrfs_item_size(buf, item2);
6798
6799                 item1_data = malloc(item1_size);
6800                 if (!item1_data)
6801                         return -ENOMEM;
6802                 item2_data = malloc(item2_size);
6803                 if (!item2_data) {
6804                         free(item1_data);
6805                         return -ENOMEM;
6806                 }
6807
6808                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6809                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6810
6811                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6812                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6813                 free(item1_data);
6814                 free(item2_data);
6815
6816                 btrfs_set_item_offset(buf, item1, item2_offset);
6817                 btrfs_set_item_offset(buf, item2, item1_offset);
6818                 btrfs_set_item_size(buf, item1, item2_size);
6819                 btrfs_set_item_size(buf, item2, item1_size);
6820
6821                 path->slots[0] = slot;
6822                 btrfs_set_item_key_unsafe(root, path, &k2);
6823                 path->slots[0] = slot + 1;
6824                 btrfs_set_item_key_unsafe(root, path, &k1);
6825         }
6826         return 0;
6827 }
6828
6829 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6830 {
6831         struct extent_buffer *buf;
6832         struct btrfs_key k1, k2;
6833         int i;
6834         int level = path->lowest_level;
6835         int ret = -EIO;
6836
6837         buf = path->nodes[level];
6838         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6839                 if (level) {
6840                         btrfs_node_key_to_cpu(buf, &k1, i);
6841                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6842                 } else {
6843                         btrfs_item_key_to_cpu(buf, &k1, i);
6844                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6845                 }
6846                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6847                         continue;
6848                 ret = swap_values(root, path, buf, i);
6849                 if (ret)
6850                         break;
6851                 btrfs_mark_buffer_dirty(buf);
6852                 i = 0;
6853         }
6854         return ret;
6855 }
6856
6857 static int delete_bogus_item(struct btrfs_root *root,
6858                              struct btrfs_path *path,
6859                              struct extent_buffer *buf, int slot)
6860 {
6861         struct btrfs_key key;
6862         int nritems = btrfs_header_nritems(buf);
6863
6864         btrfs_item_key_to_cpu(buf, &key, slot);
6865
6866         /* These are all the keys we can deal with missing. */
6867         if (key.type != BTRFS_DIR_INDEX_KEY &&
6868             key.type != BTRFS_EXTENT_ITEM_KEY &&
6869             key.type != BTRFS_METADATA_ITEM_KEY &&
6870             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6871             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6872                 return -1;
6873
6874         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6875                (unsigned long long)key.objectid, key.type,
6876                (unsigned long long)key.offset, slot, buf->start);
6877         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6878                               btrfs_item_nr_offset(slot + 1),
6879                               sizeof(struct btrfs_item) *
6880                               (nritems - slot - 1));
6881         btrfs_set_header_nritems(buf, nritems - 1);
6882         if (slot == 0) {
6883                 struct btrfs_disk_key disk_key;
6884
6885                 btrfs_item_key(buf, &disk_key, 0);
6886                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6887         }
6888         btrfs_mark_buffer_dirty(buf);
6889         return 0;
6890 }
6891
6892 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6893 {
6894         struct extent_buffer *buf;
6895         int i;
6896         int ret = 0;
6897
6898         /* We should only get this for leaves */
6899         BUG_ON(path->lowest_level);
6900         buf = path->nodes[0];
6901 again:
6902         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6903                 unsigned int shift = 0, offset;
6904
6905                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6906                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6907                         if (btrfs_item_end_nr(buf, i) >
6908                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6909                                 ret = delete_bogus_item(root, path, buf, i);
6910                                 if (!ret)
6911                                         goto again;
6912                                 fprintf(stderr, "item is off the end of the "
6913                                         "leaf, can't fix\n");
6914                                 ret = -EIO;
6915                                 break;
6916                         }
6917                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6918                                 btrfs_item_end_nr(buf, i);
6919                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6920                            btrfs_item_offset_nr(buf, i - 1)) {
6921                         if (btrfs_item_end_nr(buf, i) >
6922                             btrfs_item_offset_nr(buf, i - 1)) {
6923                                 ret = delete_bogus_item(root, path, buf, i);
6924                                 if (!ret)
6925                                         goto again;
6926                                 fprintf(stderr, "items overlap, can't fix\n");
6927                                 ret = -EIO;
6928                                 break;
6929                         }
6930                         shift = btrfs_item_offset_nr(buf, i - 1) -
6931                                 btrfs_item_end_nr(buf, i);
6932                 }
6933                 if (!shift)
6934                         continue;
6935
6936                 printf("Shifting item nr %d by %u bytes in block %llu\n",
6937                        i, shift, (unsigned long long)buf->start);
6938                 offset = btrfs_item_offset_nr(buf, i);
6939                 memmove_extent_buffer(buf,
6940                                       btrfs_leaf_data(buf) + offset + shift,
6941                                       btrfs_leaf_data(buf) + offset,
6942                                       btrfs_item_size_nr(buf, i));
6943                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
6944                                       offset + shift);
6945                 btrfs_mark_buffer_dirty(buf);
6946         }
6947
6948         /*
6949          * We may have moved things, in which case we want to exit so we don't
6950          * write those changes out.  Once we have proper abort functionality in
6951          * progs this can be changed to something nicer.
6952          */
6953         BUG_ON(ret);
6954         return ret;
6955 }
6956
6957 /*
6958  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
6959  * then just return -EIO.
6960  */
6961 static int try_to_fix_bad_block(struct btrfs_root *root,
6962                                 struct extent_buffer *buf,
6963                                 enum btrfs_tree_block_status status)
6964 {
6965         struct btrfs_trans_handle *trans;
6966         struct ulist *roots;
6967         struct ulist_node *node;
6968         struct btrfs_root *search_root;
6969         struct btrfs_path path;
6970         struct ulist_iterator iter;
6971         struct btrfs_key root_key, key;
6972         int ret;
6973
6974         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
6975             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
6976                 return -EIO;
6977
6978         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
6979         if (ret)
6980                 return -EIO;
6981
6982         btrfs_init_path(&path);
6983         ULIST_ITER_INIT(&iter);
6984         while ((node = ulist_next(roots, &iter))) {
6985                 root_key.objectid = node->val;
6986                 root_key.type = BTRFS_ROOT_ITEM_KEY;
6987                 root_key.offset = (u64)-1;
6988
6989                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
6990                 if (IS_ERR(root)) {
6991                         ret = -EIO;
6992                         break;
6993                 }
6994
6995
6996                 trans = btrfs_start_transaction(search_root, 0);
6997                 if (IS_ERR(trans)) {
6998                         ret = PTR_ERR(trans);
6999                         break;
7000                 }
7001
7002                 path.lowest_level = btrfs_header_level(buf);
7003                 path.skip_check_block = 1;
7004                 if (path.lowest_level)
7005                         btrfs_node_key_to_cpu(buf, &key, 0);
7006                 else
7007                         btrfs_item_key_to_cpu(buf, &key, 0);
7008                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7009                 if (ret) {
7010                         ret = -EIO;
7011                         btrfs_commit_transaction(trans, search_root);
7012                         break;
7013                 }
7014                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7015                         ret = fix_key_order(search_root, &path);
7016                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7017                         ret = fix_item_offset(search_root, &path);
7018                 if (ret) {
7019                         btrfs_commit_transaction(trans, search_root);
7020                         break;
7021                 }
7022                 btrfs_release_path(&path);
7023                 btrfs_commit_transaction(trans, search_root);
7024         }
7025         ulist_free(roots);
7026         btrfs_release_path(&path);
7027         return ret;
7028 }
7029
7030 static int check_block(struct btrfs_root *root,
7031                        struct cache_tree *extent_cache,
7032                        struct extent_buffer *buf, u64 flags)
7033 {
7034         struct extent_record *rec;
7035         struct cache_extent *cache;
7036         struct btrfs_key key;
7037         enum btrfs_tree_block_status status;
7038         int ret = 0;
7039         int level;
7040
7041         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7042         if (!cache)
7043                 return 1;
7044         rec = container_of(cache, struct extent_record, cache);
7045         rec->generation = btrfs_header_generation(buf);
7046
7047         level = btrfs_header_level(buf);
7048         if (btrfs_header_nritems(buf) > 0) {
7049
7050                 if (level == 0)
7051                         btrfs_item_key_to_cpu(buf, &key, 0);
7052                 else
7053                         btrfs_node_key_to_cpu(buf, &key, 0);
7054
7055                 rec->info_objectid = key.objectid;
7056         }
7057         rec->info_level = level;
7058
7059         if (btrfs_is_leaf(buf))
7060                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7061         else
7062                 status = btrfs_check_node(root, &rec->parent_key, buf);
7063
7064         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7065                 if (repair)
7066                         status = try_to_fix_bad_block(root, buf, status);
7067                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7068                         ret = -EIO;
7069                         fprintf(stderr, "bad block %llu\n",
7070                                 (unsigned long long)buf->start);
7071                 } else {
7072                         /*
7073                          * Signal to callers we need to start the scan over
7074                          * again since we'll have cowed blocks.
7075                          */
7076                         ret = -EAGAIN;
7077                 }
7078         } else {
7079                 rec->content_checked = 1;
7080                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7081                         rec->owner_ref_checked = 1;
7082                 else {
7083                         ret = check_owner_ref(root, rec, buf);
7084                         if (!ret)
7085                                 rec->owner_ref_checked = 1;
7086                 }
7087         }
7088         if (!ret)
7089                 maybe_free_extent_rec(extent_cache, rec);
7090         return ret;
7091 }
7092
7093 #if 0
7094 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7095                                                 u64 parent, u64 root)
7096 {
7097         struct list_head *cur = rec->backrefs.next;
7098         struct extent_backref *node;
7099         struct tree_backref *back;
7100
7101         while(cur != &rec->backrefs) {
7102                 node = to_extent_backref(cur);
7103                 cur = cur->next;
7104                 if (node->is_data)
7105                         continue;
7106                 back = to_tree_backref(node);
7107                 if (parent > 0) {
7108                         if (!node->full_backref)
7109                                 continue;
7110                         if (parent == back->parent)
7111                                 return back;
7112                 } else {
7113                         if (node->full_backref)
7114                                 continue;
7115                         if (back->root == root)
7116                                 return back;
7117                 }
7118         }
7119         return NULL;
7120 }
7121 #endif
7122
7123 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7124                                                 u64 parent, u64 root)
7125 {
7126         struct tree_backref *ref = malloc(sizeof(*ref));
7127
7128         if (!ref)
7129                 return NULL;
7130         memset(&ref->node, 0, sizeof(ref->node));
7131         if (parent > 0) {
7132                 ref->parent = parent;
7133                 ref->node.full_backref = 1;
7134         } else {
7135                 ref->root = root;
7136                 ref->node.full_backref = 0;
7137         }
7138
7139         return ref;
7140 }
7141
7142 #if 0
7143 static struct data_backref *find_data_backref(struct extent_record *rec,
7144                                                 u64 parent, u64 root,
7145                                                 u64 owner, u64 offset,
7146                                                 int found_ref,
7147                                                 u64 disk_bytenr, u64 bytes)
7148 {
7149         struct list_head *cur = rec->backrefs.next;
7150         struct extent_backref *node;
7151         struct data_backref *back;
7152
7153         while(cur != &rec->backrefs) {
7154                 node = to_extent_backref(cur);
7155                 cur = cur->next;
7156                 if (!node->is_data)
7157                         continue;
7158                 back = to_data_backref(node);
7159                 if (parent > 0) {
7160                         if (!node->full_backref)
7161                                 continue;
7162                         if (parent == back->parent)
7163                                 return back;
7164                 } else {
7165                         if (node->full_backref)
7166                                 continue;
7167                         if (back->root == root && back->owner == owner &&
7168                             back->offset == offset) {
7169                                 if (found_ref && node->found_ref &&
7170                                     (back->bytes != bytes ||
7171                                     back->disk_bytenr != disk_bytenr))
7172                                         continue;
7173                                 return back;
7174                         }
7175                 }
7176         }
7177         return NULL;
7178 }
7179 #endif
7180
7181 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7182                                                 u64 parent, u64 root,
7183                                                 u64 owner, u64 offset,
7184                                                 u64 max_size)
7185 {
7186         struct data_backref *ref = malloc(sizeof(*ref));
7187
7188         if (!ref)
7189                 return NULL;
7190         memset(&ref->node, 0, sizeof(ref->node));
7191         ref->node.is_data = 1;
7192
7193         if (parent > 0) {
7194                 ref->parent = parent;
7195                 ref->owner = 0;
7196                 ref->offset = 0;
7197                 ref->node.full_backref = 1;
7198         } else {
7199                 ref->root = root;
7200                 ref->owner = owner;
7201                 ref->offset = offset;
7202                 ref->node.full_backref = 0;
7203         }
7204         ref->bytes = max_size;
7205         ref->found_ref = 0;
7206         ref->num_refs = 0;
7207         if (max_size > rec->max_size)
7208                 rec->max_size = max_size;
7209         return ref;
7210 }
7211
7212 /* Check if the type of extent matches with its chunk */
7213 static void check_extent_type(struct extent_record *rec)
7214 {
7215         struct btrfs_block_group_cache *bg_cache;
7216
7217         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7218         if (!bg_cache)
7219                 return;
7220
7221         /* data extent, check chunk directly*/
7222         if (!rec->metadata) {
7223                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7224                         rec->wrong_chunk_type = 1;
7225                 return;
7226         }
7227
7228         /* metadata extent, check the obvious case first */
7229         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7230                                  BTRFS_BLOCK_GROUP_METADATA))) {
7231                 rec->wrong_chunk_type = 1;
7232                 return;
7233         }
7234
7235         /*
7236          * Check SYSTEM extent, as it's also marked as metadata, we can only
7237          * make sure it's a SYSTEM extent by its backref
7238          */
7239         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7240                 struct extent_backref *node;
7241                 struct tree_backref *tback;
7242                 u64 bg_type;
7243
7244                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7245                 if (node->is_data) {
7246                         /* tree block shouldn't have data backref */
7247                         rec->wrong_chunk_type = 1;
7248                         return;
7249                 }
7250                 tback = container_of(node, struct tree_backref, node);
7251
7252                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7253                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7254                 else
7255                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7256                 if (!(bg_cache->flags & bg_type))
7257                         rec->wrong_chunk_type = 1;
7258         }
7259 }
7260
7261 /*
7262  * Allocate a new extent record, fill default values from @tmpl and insert int
7263  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7264  * the cache, otherwise it fails.
7265  */
7266 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7267                 struct extent_record *tmpl)
7268 {
7269         struct extent_record *rec;
7270         int ret = 0;
7271
7272         BUG_ON(tmpl->max_size == 0);
7273         rec = malloc(sizeof(*rec));
7274         if (!rec)
7275                 return -ENOMEM;
7276         rec->start = tmpl->start;
7277         rec->max_size = tmpl->max_size;
7278         rec->nr = max(tmpl->nr, tmpl->max_size);
7279         rec->found_rec = tmpl->found_rec;
7280         rec->content_checked = tmpl->content_checked;
7281         rec->owner_ref_checked = tmpl->owner_ref_checked;
7282         rec->num_duplicates = 0;
7283         rec->metadata = tmpl->metadata;
7284         rec->flag_block_full_backref = FLAG_UNSET;
7285         rec->bad_full_backref = 0;
7286         rec->crossing_stripes = 0;
7287         rec->wrong_chunk_type = 0;
7288         rec->is_root = tmpl->is_root;
7289         rec->refs = tmpl->refs;
7290         rec->extent_item_refs = tmpl->extent_item_refs;
7291         rec->parent_generation = tmpl->parent_generation;
7292         INIT_LIST_HEAD(&rec->backrefs);
7293         INIT_LIST_HEAD(&rec->dups);
7294         INIT_LIST_HEAD(&rec->list);
7295         rec->backref_tree = RB_ROOT;
7296         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7297         rec->cache.start = tmpl->start;
7298         rec->cache.size = tmpl->nr;
7299         ret = insert_cache_extent(extent_cache, &rec->cache);
7300         if (ret) {
7301                 free(rec);
7302                 return ret;
7303         }
7304         bytes_used += rec->nr;
7305
7306         if (tmpl->metadata)
7307                 rec->crossing_stripes = check_crossing_stripes(global_info,
7308                                 rec->start, global_info->nodesize);
7309         check_extent_type(rec);
7310         return ret;
7311 }
7312
7313 /*
7314  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7315  * some are hints:
7316  * - refs              - if found, increase refs
7317  * - is_root           - if found, set
7318  * - content_checked   - if found, set
7319  * - owner_ref_checked - if found, set
7320  *
7321  * If not found, create a new one, initialize and insert.
7322  */
7323 static int add_extent_rec(struct cache_tree *extent_cache,
7324                 struct extent_record *tmpl)
7325 {
7326         struct extent_record *rec;
7327         struct cache_extent *cache;
7328         int ret = 0;
7329         int dup = 0;
7330
7331         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7332         if (cache) {
7333                 rec = container_of(cache, struct extent_record, cache);
7334                 if (tmpl->refs)
7335                         rec->refs++;
7336                 if (rec->nr == 1)
7337                         rec->nr = max(tmpl->nr, tmpl->max_size);
7338
7339                 /*
7340                  * We need to make sure to reset nr to whatever the extent
7341                  * record says was the real size, this way we can compare it to
7342                  * the backrefs.
7343                  */
7344                 if (tmpl->found_rec) {
7345                         if (tmpl->start != rec->start || rec->found_rec) {
7346                                 struct extent_record *tmp;
7347
7348                                 dup = 1;
7349                                 if (list_empty(&rec->list))
7350                                         list_add_tail(&rec->list,
7351                                                       &duplicate_extents);
7352
7353                                 /*
7354                                  * We have to do this song and dance in case we
7355                                  * find an extent record that falls inside of
7356                                  * our current extent record but does not have
7357                                  * the same objectid.
7358                                  */
7359                                 tmp = malloc(sizeof(*tmp));
7360                                 if (!tmp)
7361                                         return -ENOMEM;
7362                                 tmp->start = tmpl->start;
7363                                 tmp->max_size = tmpl->max_size;
7364                                 tmp->nr = tmpl->nr;
7365                                 tmp->found_rec = 1;
7366                                 tmp->metadata = tmpl->metadata;
7367                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7368                                 INIT_LIST_HEAD(&tmp->list);
7369                                 list_add_tail(&tmp->list, &rec->dups);
7370                                 rec->num_duplicates++;
7371                         } else {
7372                                 rec->nr = tmpl->nr;
7373                                 rec->found_rec = 1;
7374                         }
7375                 }
7376
7377                 if (tmpl->extent_item_refs && !dup) {
7378                         if (rec->extent_item_refs) {
7379                                 fprintf(stderr, "block %llu rec "
7380                                         "extent_item_refs %llu, passed %llu\n",
7381                                         (unsigned long long)tmpl->start,
7382                                         (unsigned long long)
7383                                                         rec->extent_item_refs,
7384                                         (unsigned long long)tmpl->extent_item_refs);
7385                         }
7386                         rec->extent_item_refs = tmpl->extent_item_refs;
7387                 }
7388                 if (tmpl->is_root)
7389                         rec->is_root = 1;
7390                 if (tmpl->content_checked)
7391                         rec->content_checked = 1;
7392                 if (tmpl->owner_ref_checked)
7393                         rec->owner_ref_checked = 1;
7394                 memcpy(&rec->parent_key, &tmpl->parent_key,
7395                                 sizeof(tmpl->parent_key));
7396                 if (tmpl->parent_generation)
7397                         rec->parent_generation = tmpl->parent_generation;
7398                 if (rec->max_size < tmpl->max_size)
7399                         rec->max_size = tmpl->max_size;
7400
7401                 /*
7402                  * A metadata extent can't cross stripe_len boundary, otherwise
7403                  * kernel scrub won't be able to handle it.
7404                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7405                  * it.
7406                  */
7407                 if (tmpl->metadata)
7408                         rec->crossing_stripes = check_crossing_stripes(
7409                                         global_info, rec->start,
7410                                         global_info->nodesize);
7411                 check_extent_type(rec);
7412                 maybe_free_extent_rec(extent_cache, rec);
7413                 return ret;
7414         }
7415
7416         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7417
7418         return ret;
7419 }
7420
7421 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7422                             u64 parent, u64 root, int found_ref)
7423 {
7424         struct extent_record *rec;
7425         struct tree_backref *back;
7426         struct cache_extent *cache;
7427         int ret;
7428         bool insert = false;
7429
7430         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7431         if (!cache) {
7432                 struct extent_record tmpl;
7433
7434                 memset(&tmpl, 0, sizeof(tmpl));
7435                 tmpl.start = bytenr;
7436                 tmpl.nr = 1;
7437                 tmpl.metadata = 1;
7438                 tmpl.max_size = 1;
7439
7440                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7441                 if (ret)
7442                         return ret;
7443
7444                 /* really a bug in cache_extent implement now */
7445                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7446                 if (!cache)
7447                         return -ENOENT;
7448         }
7449
7450         rec = container_of(cache, struct extent_record, cache);
7451         if (rec->start != bytenr) {
7452                 /*
7453                  * Several cause, from unaligned bytenr to over lapping extents
7454                  */
7455                 return -EEXIST;
7456         }
7457
7458         back = find_tree_backref(rec, parent, root);
7459         if (!back) {
7460                 back = alloc_tree_backref(rec, parent, root);
7461                 if (!back)
7462                         return -ENOMEM;
7463                 insert = true;
7464         }
7465
7466         if (found_ref) {
7467                 if (back->node.found_ref) {
7468                         fprintf(stderr, "Extent back ref already exists "
7469                                 "for %llu parent %llu root %llu \n",
7470                                 (unsigned long long)bytenr,
7471                                 (unsigned long long)parent,
7472                                 (unsigned long long)root);
7473                 }
7474                 back->node.found_ref = 1;
7475         } else {
7476                 if (back->node.found_extent_tree) {
7477                         fprintf(stderr, "Extent back ref already exists "
7478                                 "for %llu parent %llu root %llu \n",
7479                                 (unsigned long long)bytenr,
7480                                 (unsigned long long)parent,
7481                                 (unsigned long long)root);
7482                 }
7483                 back->node.found_extent_tree = 1;
7484         }
7485         if (insert)
7486                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7487                         compare_extent_backref));
7488         check_extent_type(rec);
7489         maybe_free_extent_rec(extent_cache, rec);
7490         return 0;
7491 }
7492
7493 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7494                             u64 parent, u64 root, u64 owner, u64 offset,
7495                             u32 num_refs, int found_ref, u64 max_size)
7496 {
7497         struct extent_record *rec;
7498         struct data_backref *back;
7499         struct cache_extent *cache;
7500         int ret;
7501         bool insert = false;
7502
7503         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7504         if (!cache) {
7505                 struct extent_record tmpl;
7506
7507                 memset(&tmpl, 0, sizeof(tmpl));
7508                 tmpl.start = bytenr;
7509                 tmpl.nr = 1;
7510                 tmpl.max_size = max_size;
7511
7512                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7513                 if (ret)
7514                         return ret;
7515
7516                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7517                 if (!cache)
7518                         abort();
7519         }
7520
7521         rec = container_of(cache, struct extent_record, cache);
7522         if (rec->max_size < max_size)
7523                 rec->max_size = max_size;
7524
7525         /*
7526          * If found_ref is set then max_size is the real size and must match the
7527          * existing refs.  So if we have already found a ref then we need to
7528          * make sure that this ref matches the existing one, otherwise we need
7529          * to add a new backref so we can notice that the backrefs don't match
7530          * and we need to figure out who is telling the truth.  This is to
7531          * account for that awful fsync bug I introduced where we'd end up with
7532          * a btrfs_file_extent_item that would have its length include multiple
7533          * prealloc extents or point inside of a prealloc extent.
7534          */
7535         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7536                                  bytenr, max_size);
7537         if (!back) {
7538                 back = alloc_data_backref(rec, parent, root, owner, offset,
7539                                           max_size);
7540                 BUG_ON(!back);
7541                 insert = true;
7542         }
7543
7544         if (found_ref) {
7545                 BUG_ON(num_refs != 1);
7546                 if (back->node.found_ref)
7547                         BUG_ON(back->bytes != max_size);
7548                 back->node.found_ref = 1;
7549                 back->found_ref += 1;
7550                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7551                         back->bytes = max_size;
7552                         back->disk_bytenr = bytenr;
7553
7554                         /* Need to reinsert if not already in the tree */
7555                         if (!insert) {
7556                                 rb_erase(&back->node.node, &rec->backref_tree);
7557                                 insert = true;
7558                         }
7559                 }
7560                 rec->refs += 1;
7561                 rec->content_checked = 1;
7562                 rec->owner_ref_checked = 1;
7563         } else {
7564                 if (back->node.found_extent_tree) {
7565                         fprintf(stderr, "Extent back ref already exists "
7566                                 "for %llu parent %llu root %llu "
7567                                 "owner %llu offset %llu num_refs %lu\n",
7568                                 (unsigned long long)bytenr,
7569                                 (unsigned long long)parent,
7570                                 (unsigned long long)root,
7571                                 (unsigned long long)owner,
7572                                 (unsigned long long)offset,
7573                                 (unsigned long)num_refs);
7574                 }
7575                 back->num_refs = num_refs;
7576                 back->node.found_extent_tree = 1;
7577         }
7578         if (insert)
7579                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7580                         compare_extent_backref));
7581
7582         maybe_free_extent_rec(extent_cache, rec);
7583         return 0;
7584 }
7585
7586 static int add_pending(struct cache_tree *pending,
7587                        struct cache_tree *seen, u64 bytenr, u32 size)
7588 {
7589         int ret;
7590         ret = add_cache_extent(seen, bytenr, size);
7591         if (ret)
7592                 return ret;
7593         add_cache_extent(pending, bytenr, size);
7594         return 0;
7595 }
7596
7597 static int pick_next_pending(struct cache_tree *pending,
7598                         struct cache_tree *reada,
7599                         struct cache_tree *nodes,
7600                         u64 last, struct block_info *bits, int bits_nr,
7601                         int *reada_bits)
7602 {
7603         unsigned long node_start = last;
7604         struct cache_extent *cache;
7605         int ret;
7606
7607         cache = search_cache_extent(reada, 0);
7608         if (cache) {
7609                 bits[0].start = cache->start;
7610                 bits[0].size = cache->size;
7611                 *reada_bits = 1;
7612                 return 1;
7613         }
7614         *reada_bits = 0;
7615         if (node_start > 32768)
7616                 node_start -= 32768;
7617
7618         cache = search_cache_extent(nodes, node_start);
7619         if (!cache)
7620                 cache = search_cache_extent(nodes, 0);
7621
7622         if (!cache) {
7623                  cache = search_cache_extent(pending, 0);
7624                  if (!cache)
7625                          return 0;
7626                  ret = 0;
7627                  do {
7628                          bits[ret].start = cache->start;
7629                          bits[ret].size = cache->size;
7630                          cache = next_cache_extent(cache);
7631                          ret++;
7632                  } while (cache && ret < bits_nr);
7633                  return ret;
7634         }
7635
7636         ret = 0;
7637         do {
7638                 bits[ret].start = cache->start;
7639                 bits[ret].size = cache->size;
7640                 cache = next_cache_extent(cache);
7641                 ret++;
7642         } while (cache && ret < bits_nr);
7643
7644         if (bits_nr - ret > 8) {
7645                 u64 lookup = bits[0].start + bits[0].size;
7646                 struct cache_extent *next;
7647                 next = search_cache_extent(pending, lookup);
7648                 while(next) {
7649                         if (next->start - lookup > 32768)
7650                                 break;
7651                         bits[ret].start = next->start;
7652                         bits[ret].size = next->size;
7653                         lookup = next->start + next->size;
7654                         ret++;
7655                         if (ret == bits_nr)
7656                                 break;
7657                         next = next_cache_extent(next);
7658                         if (!next)
7659                                 break;
7660                 }
7661         }
7662         return ret;
7663 }
7664
7665 static void free_chunk_record(struct cache_extent *cache)
7666 {
7667         struct chunk_record *rec;
7668
7669         rec = container_of(cache, struct chunk_record, cache);
7670         list_del_init(&rec->list);
7671         list_del_init(&rec->dextents);
7672         free(rec);
7673 }
7674
7675 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7676 {
7677         cache_tree_free_extents(chunk_cache, free_chunk_record);
7678 }
7679
7680 static void free_device_record(struct rb_node *node)
7681 {
7682         struct device_record *rec;
7683
7684         rec = container_of(node, struct device_record, node);
7685         free(rec);
7686 }
7687
7688 FREE_RB_BASED_TREE(device_cache, free_device_record);
7689
7690 int insert_block_group_record(struct block_group_tree *tree,
7691                               struct block_group_record *bg_rec)
7692 {
7693         int ret;
7694
7695         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7696         if (ret)
7697                 return ret;
7698
7699         list_add_tail(&bg_rec->list, &tree->block_groups);
7700         return 0;
7701 }
7702
7703 static void free_block_group_record(struct cache_extent *cache)
7704 {
7705         struct block_group_record *rec;
7706
7707         rec = container_of(cache, struct block_group_record, cache);
7708         list_del_init(&rec->list);
7709         free(rec);
7710 }
7711
7712 void free_block_group_tree(struct block_group_tree *tree)
7713 {
7714         cache_tree_free_extents(&tree->tree, free_block_group_record);
7715 }
7716
7717 int insert_device_extent_record(struct device_extent_tree *tree,
7718                                 struct device_extent_record *de_rec)
7719 {
7720         int ret;
7721
7722         /*
7723          * Device extent is a bit different from the other extents, because
7724          * the extents which belong to the different devices may have the
7725          * same start and size, so we need use the special extent cache
7726          * search/insert functions.
7727          */
7728         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7729         if (ret)
7730                 return ret;
7731
7732         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7733         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7734         return 0;
7735 }
7736
7737 static void free_device_extent_record(struct cache_extent *cache)
7738 {
7739         struct device_extent_record *rec;
7740
7741         rec = container_of(cache, struct device_extent_record, cache);
7742         if (!list_empty(&rec->chunk_list))
7743                 list_del_init(&rec->chunk_list);
7744         if (!list_empty(&rec->device_list))
7745                 list_del_init(&rec->device_list);
7746         free(rec);
7747 }
7748
7749 void free_device_extent_tree(struct device_extent_tree *tree)
7750 {
7751         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7752 }
7753
7754 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7755 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7756                                  struct extent_buffer *leaf, int slot)
7757 {
7758         struct btrfs_extent_ref_v0 *ref0;
7759         struct btrfs_key key;
7760         int ret;
7761
7762         btrfs_item_key_to_cpu(leaf, &key, slot);
7763         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7764         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7765                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7766                                 0, 0);
7767         } else {
7768                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7769                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7770         }
7771         return ret;
7772 }
7773 #endif
7774
7775 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7776                                             struct btrfs_key *key,
7777                                             int slot)
7778 {
7779         struct btrfs_chunk *ptr;
7780         struct chunk_record *rec;
7781         int num_stripes, i;
7782
7783         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7784         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7785
7786         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7787         if (!rec) {
7788                 fprintf(stderr, "memory allocation failed\n");
7789                 exit(-1);
7790         }
7791
7792         INIT_LIST_HEAD(&rec->list);
7793         INIT_LIST_HEAD(&rec->dextents);
7794         rec->bg_rec = NULL;
7795
7796         rec->cache.start = key->offset;
7797         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7798
7799         rec->generation = btrfs_header_generation(leaf);
7800
7801         rec->objectid = key->objectid;
7802         rec->type = key->type;
7803         rec->offset = key->offset;
7804
7805         rec->length = rec->cache.size;
7806         rec->owner = btrfs_chunk_owner(leaf, ptr);
7807         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7808         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7809         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7810         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7811         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7812         rec->num_stripes = num_stripes;
7813         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7814
7815         for (i = 0; i < rec->num_stripes; ++i) {
7816                 rec->stripes[i].devid =
7817                         btrfs_stripe_devid_nr(leaf, ptr, i);
7818                 rec->stripes[i].offset =
7819                         btrfs_stripe_offset_nr(leaf, ptr, i);
7820                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7821                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7822                                 BTRFS_UUID_SIZE);
7823         }
7824
7825         return rec;
7826 }
7827
7828 static int process_chunk_item(struct cache_tree *chunk_cache,
7829                               struct btrfs_key *key, struct extent_buffer *eb,
7830                               int slot)
7831 {
7832         struct chunk_record *rec;
7833         struct btrfs_chunk *chunk;
7834         int ret = 0;
7835
7836         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7837         /*
7838          * Do extra check for this chunk item,
7839          *
7840          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7841          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7842          * and owner<->key_type check.
7843          */
7844         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7845                                       key->offset);
7846         if (ret < 0) {
7847                 error("chunk(%llu, %llu) is not valid, ignore it",
7848                       key->offset, btrfs_chunk_length(eb, chunk));
7849                 return 0;
7850         }
7851         rec = btrfs_new_chunk_record(eb, key, slot);
7852         ret = insert_cache_extent(chunk_cache, &rec->cache);
7853         if (ret) {
7854                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7855                         rec->offset, rec->length);
7856                 free(rec);
7857         }
7858
7859         return ret;
7860 }
7861
7862 static int process_device_item(struct rb_root *dev_cache,
7863                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7864 {
7865         struct btrfs_dev_item *ptr;
7866         struct device_record *rec;
7867         int ret = 0;
7868
7869         ptr = btrfs_item_ptr(eb,
7870                 slot, struct btrfs_dev_item);
7871
7872         rec = malloc(sizeof(*rec));
7873         if (!rec) {
7874                 fprintf(stderr, "memory allocation failed\n");
7875                 return -ENOMEM;
7876         }
7877
7878         rec->devid = key->offset;
7879         rec->generation = btrfs_header_generation(eb);
7880
7881         rec->objectid = key->objectid;
7882         rec->type = key->type;
7883         rec->offset = key->offset;
7884
7885         rec->devid = btrfs_device_id(eb, ptr);
7886         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7887         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7888
7889         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7890         if (ret) {
7891                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7892                 free(rec);
7893         }
7894
7895         return ret;
7896 }
7897
7898 struct block_group_record *
7899 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7900                              int slot)
7901 {
7902         struct btrfs_block_group_item *ptr;
7903         struct block_group_record *rec;
7904
7905         rec = calloc(1, sizeof(*rec));
7906         if (!rec) {
7907                 fprintf(stderr, "memory allocation failed\n");
7908                 exit(-1);
7909         }
7910
7911         rec->cache.start = key->objectid;
7912         rec->cache.size = key->offset;
7913
7914         rec->generation = btrfs_header_generation(leaf);
7915
7916         rec->objectid = key->objectid;
7917         rec->type = key->type;
7918         rec->offset = key->offset;
7919
7920         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7921         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7922
7923         INIT_LIST_HEAD(&rec->list);
7924
7925         return rec;
7926 }
7927
7928 static int process_block_group_item(struct block_group_tree *block_group_cache,
7929                                     struct btrfs_key *key,
7930                                     struct extent_buffer *eb, int slot)
7931 {
7932         struct block_group_record *rec;
7933         int ret = 0;
7934
7935         rec = btrfs_new_block_group_record(eb, key, slot);
7936         ret = insert_block_group_record(block_group_cache, rec);
7937         if (ret) {
7938                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
7939                         rec->objectid, rec->offset);
7940                 free(rec);
7941         }
7942
7943         return ret;
7944 }
7945
7946 struct device_extent_record *
7947 btrfs_new_device_extent_record(struct extent_buffer *leaf,
7948                                struct btrfs_key *key, int slot)
7949 {
7950         struct device_extent_record *rec;
7951         struct btrfs_dev_extent *ptr;
7952
7953         rec = calloc(1, sizeof(*rec));
7954         if (!rec) {
7955                 fprintf(stderr, "memory allocation failed\n");
7956                 exit(-1);
7957         }
7958
7959         rec->cache.objectid = key->objectid;
7960         rec->cache.start = key->offset;
7961
7962         rec->generation = btrfs_header_generation(leaf);
7963
7964         rec->objectid = key->objectid;
7965         rec->type = key->type;
7966         rec->offset = key->offset;
7967
7968         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7969         rec->chunk_objecteid =
7970                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
7971         rec->chunk_offset =
7972                 btrfs_dev_extent_chunk_offset(leaf, ptr);
7973         rec->length = btrfs_dev_extent_length(leaf, ptr);
7974         rec->cache.size = rec->length;
7975
7976         INIT_LIST_HEAD(&rec->chunk_list);
7977         INIT_LIST_HEAD(&rec->device_list);
7978
7979         return rec;
7980 }
7981
7982 static int
7983 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
7984                            struct btrfs_key *key, struct extent_buffer *eb,
7985                            int slot)
7986 {
7987         struct device_extent_record *rec;
7988         int ret;
7989
7990         rec = btrfs_new_device_extent_record(eb, key, slot);
7991         ret = insert_device_extent_record(dev_extent_cache, rec);
7992         if (ret) {
7993                 fprintf(stderr,
7994                         "Device extent[%llu, %llu, %llu] existed.\n",
7995                         rec->objectid, rec->offset, rec->length);
7996                 free(rec);
7997         }
7998
7999         return ret;
8000 }
8001
8002 static int process_extent_item(struct btrfs_root *root,
8003                                struct cache_tree *extent_cache,
8004                                struct extent_buffer *eb, int slot)
8005 {
8006         struct btrfs_extent_item *ei;
8007         struct btrfs_extent_inline_ref *iref;
8008         struct btrfs_extent_data_ref *dref;
8009         struct btrfs_shared_data_ref *sref;
8010         struct btrfs_key key;
8011         struct extent_record tmpl;
8012         unsigned long end;
8013         unsigned long ptr;
8014         int ret;
8015         int type;
8016         u32 item_size = btrfs_item_size_nr(eb, slot);
8017         u64 refs = 0;
8018         u64 offset;
8019         u64 num_bytes;
8020         int metadata = 0;
8021
8022         btrfs_item_key_to_cpu(eb, &key, slot);
8023
8024         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8025                 metadata = 1;
8026                 num_bytes = root->fs_info->nodesize;
8027         } else {
8028                 num_bytes = key.offset;
8029         }
8030
8031         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8032                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8033                       key.objectid, root->fs_info->sectorsize);
8034                 return -EIO;
8035         }
8036         if (item_size < sizeof(*ei)) {
8037 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8038                 struct btrfs_extent_item_v0 *ei0;
8039                 if (item_size != sizeof(*ei0)) {
8040                         error(
8041         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8042                                 key.objectid, key.type, key.offset,
8043                                 btrfs_header_bytenr(eb), slot);
8044                         BUG();
8045                 }
8046                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8047                 refs = btrfs_extent_refs_v0(eb, ei0);
8048 #else
8049                 BUG();
8050 #endif
8051                 memset(&tmpl, 0, sizeof(tmpl));
8052                 tmpl.start = key.objectid;
8053                 tmpl.nr = num_bytes;
8054                 tmpl.extent_item_refs = refs;
8055                 tmpl.metadata = metadata;
8056                 tmpl.found_rec = 1;
8057                 tmpl.max_size = num_bytes;
8058
8059                 return add_extent_rec(extent_cache, &tmpl);
8060         }
8061
8062         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8063         refs = btrfs_extent_refs(eb, ei);
8064         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8065                 metadata = 1;
8066         else
8067                 metadata = 0;
8068         if (metadata && num_bytes != root->fs_info->nodesize) {
8069                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8070                       num_bytes, root->fs_info->nodesize);
8071                 return -EIO;
8072         }
8073         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8074                 error("ignore invalid data extent, length %llu is not aligned to %u",
8075                       num_bytes, root->fs_info->sectorsize);
8076                 return -EIO;
8077         }
8078
8079         memset(&tmpl, 0, sizeof(tmpl));
8080         tmpl.start = key.objectid;
8081         tmpl.nr = num_bytes;
8082         tmpl.extent_item_refs = refs;
8083         tmpl.metadata = metadata;
8084         tmpl.found_rec = 1;
8085         tmpl.max_size = num_bytes;
8086         add_extent_rec(extent_cache, &tmpl);
8087
8088         ptr = (unsigned long)(ei + 1);
8089         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8090             key.type == BTRFS_EXTENT_ITEM_KEY)
8091                 ptr += sizeof(struct btrfs_tree_block_info);
8092
8093         end = (unsigned long)ei + item_size;
8094         while (ptr < end) {
8095                 iref = (struct btrfs_extent_inline_ref *)ptr;
8096                 type = btrfs_extent_inline_ref_type(eb, iref);
8097                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8098                 switch (type) {
8099                 case BTRFS_TREE_BLOCK_REF_KEY:
8100                         ret = add_tree_backref(extent_cache, key.objectid,
8101                                         0, offset, 0);
8102                         if (ret < 0)
8103                                 error(
8104                         "add_tree_backref failed (extent items tree block): %s",
8105                                       strerror(-ret));
8106                         break;
8107                 case BTRFS_SHARED_BLOCK_REF_KEY:
8108                         ret = add_tree_backref(extent_cache, key.objectid,
8109                                         offset, 0, 0);
8110                         if (ret < 0)
8111                                 error(
8112                         "add_tree_backref failed (extent items shared block): %s",
8113                                       strerror(-ret));
8114                         break;
8115                 case BTRFS_EXTENT_DATA_REF_KEY:
8116                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8117                         add_data_backref(extent_cache, key.objectid, 0,
8118                                         btrfs_extent_data_ref_root(eb, dref),
8119                                         btrfs_extent_data_ref_objectid(eb,
8120                                                                        dref),
8121                                         btrfs_extent_data_ref_offset(eb, dref),
8122                                         btrfs_extent_data_ref_count(eb, dref),
8123                                         0, num_bytes);
8124                         break;
8125                 case BTRFS_SHARED_DATA_REF_KEY:
8126                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8127                         add_data_backref(extent_cache, key.objectid, offset,
8128                                         0, 0, 0,
8129                                         btrfs_shared_data_ref_count(eb, sref),
8130                                         0, num_bytes);
8131                         break;
8132                 default:
8133                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8134                                 key.objectid, key.type, num_bytes);
8135                         goto out;
8136                 }
8137                 ptr += btrfs_extent_inline_ref_size(type);
8138         }
8139         WARN_ON(ptr > end);
8140 out:
8141         return 0;
8142 }
8143
8144 static int check_cache_range(struct btrfs_root *root,
8145                              struct btrfs_block_group_cache *cache,
8146                              u64 offset, u64 bytes)
8147 {
8148         struct btrfs_free_space *entry;
8149         u64 *logical;
8150         u64 bytenr;
8151         int stripe_len;
8152         int i, nr, ret;
8153
8154         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8155                 bytenr = btrfs_sb_offset(i);
8156                 ret = btrfs_rmap_block(root->fs_info,
8157                                        cache->key.objectid, bytenr, 0,
8158                                        &logical, &nr, &stripe_len);
8159                 if (ret)
8160                         return ret;
8161
8162                 while (nr--) {
8163                         if (logical[nr] + stripe_len <= offset)
8164                                 continue;
8165                         if (offset + bytes <= logical[nr])
8166                                 continue;
8167                         if (logical[nr] == offset) {
8168                                 if (stripe_len >= bytes) {
8169                                         free(logical);
8170                                         return 0;
8171                                 }
8172                                 bytes -= stripe_len;
8173                                 offset += stripe_len;
8174                         } else if (logical[nr] < offset) {
8175                                 if (logical[nr] + stripe_len >=
8176                                     offset + bytes) {
8177                                         free(logical);
8178                                         return 0;
8179                                 }
8180                                 bytes = (offset + bytes) -
8181                                         (logical[nr] + stripe_len);
8182                                 offset = logical[nr] + stripe_len;
8183                         } else {
8184                                 /*
8185                                  * Could be tricky, the super may land in the
8186                                  * middle of the area we're checking.  First
8187                                  * check the easiest case, it's at the end.
8188                                  */
8189                                 if (logical[nr] + stripe_len >=
8190                                     bytes + offset) {
8191                                         bytes = logical[nr] - offset;
8192                                         continue;
8193                                 }
8194
8195                                 /* Check the left side */
8196                                 ret = check_cache_range(root, cache,
8197                                                         offset,
8198                                                         logical[nr] - offset);
8199                                 if (ret) {
8200                                         free(logical);
8201                                         return ret;
8202                                 }
8203
8204                                 /* Now we continue with the right side */
8205                                 bytes = (offset + bytes) -
8206                                         (logical[nr] + stripe_len);
8207                                 offset = logical[nr] + stripe_len;
8208                         }
8209                 }
8210
8211                 free(logical);
8212         }
8213
8214         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8215         if (!entry) {
8216                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8217                         offset, offset+bytes);
8218                 return -EINVAL;
8219         }
8220
8221         if (entry->offset != offset) {
8222                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8223                         entry->offset);
8224                 return -EINVAL;
8225         }
8226
8227         if (entry->bytes != bytes) {
8228                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8229                         bytes, entry->bytes, offset);
8230                 return -EINVAL;
8231         }
8232
8233         unlink_free_space(cache->free_space_ctl, entry);
8234         free(entry);
8235         return 0;
8236 }
8237
8238 static int verify_space_cache(struct btrfs_root *root,
8239                               struct btrfs_block_group_cache *cache)
8240 {
8241         struct btrfs_path path;
8242         struct extent_buffer *leaf;
8243         struct btrfs_key key;
8244         u64 last;
8245         int ret = 0;
8246
8247         root = root->fs_info->extent_root;
8248
8249         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8250
8251         btrfs_init_path(&path);
8252         key.objectid = last;
8253         key.offset = 0;
8254         key.type = BTRFS_EXTENT_ITEM_KEY;
8255         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8256         if (ret < 0)
8257                 goto out;
8258         ret = 0;
8259         while (1) {
8260                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8261                         ret = btrfs_next_leaf(root, &path);
8262                         if (ret < 0)
8263                                 goto out;
8264                         if (ret > 0) {
8265                                 ret = 0;
8266                                 break;
8267                         }
8268                 }
8269                 leaf = path.nodes[0];
8270                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8271                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8272                         break;
8273                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8274                     key.type != BTRFS_METADATA_ITEM_KEY) {
8275                         path.slots[0]++;
8276                         continue;
8277                 }
8278
8279                 if (last == key.objectid) {
8280                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8281                                 last = key.objectid + key.offset;
8282                         else
8283                                 last = key.objectid + root->fs_info->nodesize;
8284                         path.slots[0]++;
8285                         continue;
8286                 }
8287
8288                 ret = check_cache_range(root, cache, last,
8289                                         key.objectid - last);
8290                 if (ret)
8291                         break;
8292                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8293                         last = key.objectid + key.offset;
8294                 else
8295                         last = key.objectid + root->fs_info->nodesize;
8296                 path.slots[0]++;
8297         }
8298
8299         if (last < cache->key.objectid + cache->key.offset)
8300                 ret = check_cache_range(root, cache, last,
8301                                         cache->key.objectid +
8302                                         cache->key.offset - last);
8303
8304 out:
8305         btrfs_release_path(&path);
8306
8307         if (!ret &&
8308             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8309                 fprintf(stderr, "There are still entries left in the space "
8310                         "cache\n");
8311                 ret = -EINVAL;
8312         }
8313
8314         return ret;
8315 }
8316
8317 static int check_space_cache(struct btrfs_root *root)
8318 {
8319         struct btrfs_block_group_cache *cache;
8320         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8321         int ret;
8322         int error = 0;
8323
8324         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8325             btrfs_super_generation(root->fs_info->super_copy) !=
8326             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8327                 printf("cache and super generation don't match, space cache "
8328                        "will be invalidated\n");
8329                 return 0;
8330         }
8331
8332         if (ctx.progress_enabled) {
8333                 ctx.tp = TASK_FREE_SPACE;
8334                 task_start(ctx.info);
8335         }
8336
8337         while (1) {
8338                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8339                 if (!cache)
8340                         break;
8341
8342                 start = cache->key.objectid + cache->key.offset;
8343                 if (!cache->free_space_ctl) {
8344                         if (btrfs_init_free_space_ctl(cache,
8345                                                 root->fs_info->sectorsize)) {
8346                                 ret = -ENOMEM;
8347                                 break;
8348                         }
8349                 } else {
8350                         btrfs_remove_free_space_cache(cache);
8351                 }
8352
8353                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8354                         ret = exclude_super_stripes(root, cache);
8355                         if (ret) {
8356                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8357                                         strerror(-ret));
8358                                 error++;
8359                                 continue;
8360                         }
8361                         ret = load_free_space_tree(root->fs_info, cache);
8362                         free_excluded_extents(root, cache);
8363                         if (ret < 0) {
8364                                 fprintf(stderr, "could not load free space tree: %s\n",
8365                                         strerror(-ret));
8366                                 error++;
8367                                 continue;
8368                         }
8369                         error += ret;
8370                 } else {
8371                         ret = load_free_space_cache(root->fs_info, cache);
8372                         if (!ret)
8373                                 continue;
8374                 }
8375
8376                 ret = verify_space_cache(root, cache);
8377                 if (ret) {
8378                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8379                                 cache->key.objectid);
8380                         error++;
8381                 }
8382         }
8383
8384         task_stop(ctx.info);
8385
8386         return error ? -EINVAL : 0;
8387 }
8388
8389 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8390                         u64 num_bytes, unsigned long leaf_offset,
8391                         struct extent_buffer *eb) {
8392
8393         struct btrfs_fs_info *fs_info = root->fs_info;
8394         u64 offset = 0;
8395         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8396         char *data;
8397         unsigned long csum_offset;
8398         u32 csum;
8399         u32 csum_expected;
8400         u64 read_len;
8401         u64 data_checked = 0;
8402         u64 tmp;
8403         int ret = 0;
8404         int mirror;
8405         int num_copies;
8406
8407         if (num_bytes % fs_info->sectorsize)
8408                 return -EINVAL;
8409
8410         data = malloc(num_bytes);
8411         if (!data)
8412                 return -ENOMEM;
8413
8414         while (offset < num_bytes) {
8415                 mirror = 0;
8416 again:
8417                 read_len = num_bytes - offset;
8418                 /* read as much space once a time */
8419                 ret = read_extent_data(fs_info, data + offset,
8420                                 bytenr + offset, &read_len, mirror);
8421                 if (ret)
8422                         goto out;
8423                 data_checked = 0;
8424                 /* verify every 4k data's checksum */
8425                 while (data_checked < read_len) {
8426                         csum = ~(u32)0;
8427                         tmp = offset + data_checked;
8428
8429                         csum = btrfs_csum_data((char *)data + tmp,
8430                                                csum, fs_info->sectorsize);
8431                         btrfs_csum_final(csum, (u8 *)&csum);
8432
8433                         csum_offset = leaf_offset +
8434                                  tmp / fs_info->sectorsize * csum_size;
8435                         read_extent_buffer(eb, (char *)&csum_expected,
8436                                            csum_offset, csum_size);
8437                         /* try another mirror */
8438                         if (csum != csum_expected) {
8439                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8440                                                 mirror, bytenr + tmp,
8441                                                 csum, csum_expected);
8442                                 num_copies = btrfs_num_copies(root->fs_info,
8443                                                 bytenr, num_bytes);
8444                                 if (mirror < num_copies - 1) {
8445                                         mirror += 1;
8446                                         goto again;
8447                                 }
8448                         }
8449                         data_checked += fs_info->sectorsize;
8450                 }
8451                 offset += read_len;
8452         }
8453 out:
8454         free(data);
8455         return ret;
8456 }
8457
8458 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8459                                u64 num_bytes)
8460 {
8461         struct btrfs_path path;
8462         struct extent_buffer *leaf;
8463         struct btrfs_key key;
8464         int ret;
8465
8466         btrfs_init_path(&path);
8467         key.objectid = bytenr;
8468         key.type = BTRFS_EXTENT_ITEM_KEY;
8469         key.offset = (u64)-1;
8470
8471 again:
8472         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8473                                 0, 0);
8474         if (ret < 0) {
8475                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8476                 btrfs_release_path(&path);
8477                 return ret;
8478         } else if (ret) {
8479                 if (path.slots[0] > 0) {
8480                         path.slots[0]--;
8481                 } else {
8482                         ret = btrfs_prev_leaf(root, &path);
8483                         if (ret < 0) {
8484                                 goto out;
8485                         } else if (ret > 0) {
8486                                 ret = 0;
8487                                 goto out;
8488                         }
8489                 }
8490         }
8491
8492         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8493
8494         /*
8495          * Block group items come before extent items if they have the same
8496          * bytenr, so walk back one more just in case.  Dear future traveller,
8497          * first congrats on mastering time travel.  Now if it's not too much
8498          * trouble could you go back to 2006 and tell Chris to make the
8499          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8500          * EXTENT_ITEM_KEY please?
8501          */
8502         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8503                 if (path.slots[0] > 0) {
8504                         path.slots[0]--;
8505                 } else {
8506                         ret = btrfs_prev_leaf(root, &path);
8507                         if (ret < 0) {
8508                                 goto out;
8509                         } else if (ret > 0) {
8510                                 ret = 0;
8511                                 goto out;
8512                         }
8513                 }
8514                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8515         }
8516
8517         while (num_bytes) {
8518                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8519                         ret = btrfs_next_leaf(root, &path);
8520                         if (ret < 0) {
8521                                 fprintf(stderr, "Error going to next leaf "
8522                                         "%d\n", ret);
8523                                 btrfs_release_path(&path);
8524                                 return ret;
8525                         } else if (ret) {
8526                                 break;
8527                         }
8528                 }
8529                 leaf = path.nodes[0];
8530                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8531                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8532                         path.slots[0]++;
8533                         continue;
8534                 }
8535                 if (key.objectid + key.offset < bytenr) {
8536                         path.slots[0]++;
8537                         continue;
8538                 }
8539                 if (key.objectid > bytenr + num_bytes)
8540                         break;
8541
8542                 if (key.objectid == bytenr) {
8543                         if (key.offset >= num_bytes) {
8544                                 num_bytes = 0;
8545                                 break;
8546                         }
8547                         num_bytes -= key.offset;
8548                         bytenr += key.offset;
8549                 } else if (key.objectid < bytenr) {
8550                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8551                                 num_bytes = 0;
8552                                 break;
8553                         }
8554                         num_bytes = (bytenr + num_bytes) -
8555                                 (key.objectid + key.offset);
8556                         bytenr = key.objectid + key.offset;
8557                 } else {
8558                         if (key.objectid + key.offset < bytenr + num_bytes) {
8559                                 u64 new_start = key.objectid + key.offset;
8560                                 u64 new_bytes = bytenr + num_bytes - new_start;
8561
8562                                 /*
8563                                  * Weird case, the extent is in the middle of
8564                                  * our range, we'll have to search one side
8565                                  * and then the other.  Not sure if this happens
8566                                  * in real life, but no harm in coding it up
8567                                  * anyway just in case.
8568                                  */
8569                                 btrfs_release_path(&path);
8570                                 ret = check_extent_exists(root, new_start,
8571                                                           new_bytes);
8572                                 if (ret) {
8573                                         fprintf(stderr, "Right section didn't "
8574                                                 "have a record\n");
8575                                         break;
8576                                 }
8577                                 num_bytes = key.objectid - bytenr;
8578                                 goto again;
8579                         }
8580                         num_bytes = key.objectid - bytenr;
8581                 }
8582                 path.slots[0]++;
8583         }
8584         ret = 0;
8585
8586 out:
8587         if (num_bytes && !ret) {
8588                 fprintf(stderr, "There are no extents for csum range "
8589                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8590                 ret = 1;
8591         }
8592
8593         btrfs_release_path(&path);
8594         return ret;
8595 }
8596
8597 static int check_csums(struct btrfs_root *root)
8598 {
8599         struct btrfs_path path;
8600         struct extent_buffer *leaf;
8601         struct btrfs_key key;
8602         u64 offset = 0, num_bytes = 0;
8603         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8604         int errors = 0;
8605         int ret;
8606         u64 data_len;
8607         unsigned long leaf_offset;
8608
8609         root = root->fs_info->csum_root;
8610         if (!extent_buffer_uptodate(root->node)) {
8611                 fprintf(stderr, "No valid csum tree found\n");
8612                 return -ENOENT;
8613         }
8614
8615         btrfs_init_path(&path);
8616         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8617         key.type = BTRFS_EXTENT_CSUM_KEY;
8618         key.offset = 0;
8619         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8620         if (ret < 0) {
8621                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8622                 btrfs_release_path(&path);
8623                 return ret;
8624         }
8625
8626         if (ret > 0 && path.slots[0])
8627                 path.slots[0]--;
8628         ret = 0;
8629
8630         while (1) {
8631                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8632                         ret = btrfs_next_leaf(root, &path);
8633                         if (ret < 0) {
8634                                 fprintf(stderr, "Error going to next leaf "
8635                                         "%d\n", ret);
8636                                 break;
8637                         }
8638                         if (ret)
8639                                 break;
8640                 }
8641                 leaf = path.nodes[0];
8642
8643                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8644                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8645                         path.slots[0]++;
8646                         continue;
8647                 }
8648
8649                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8650                               csum_size) * root->fs_info->sectorsize;
8651                 if (!check_data_csum)
8652                         goto skip_csum_check;
8653                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8654                 ret = check_extent_csums(root, key.offset, data_len,
8655                                          leaf_offset, leaf);
8656                 if (ret)
8657                         break;
8658 skip_csum_check:
8659                 if (!num_bytes) {
8660                         offset = key.offset;
8661                 } else if (key.offset != offset + num_bytes) {
8662                         ret = check_extent_exists(root, offset, num_bytes);
8663                         if (ret) {
8664                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8665                                         "there is no extent record\n",
8666                                         offset, offset+num_bytes);
8667                                 errors++;
8668                         }
8669                         offset = key.offset;
8670                         num_bytes = 0;
8671                 }
8672                 num_bytes += data_len;
8673                 path.slots[0]++;
8674         }
8675
8676         btrfs_release_path(&path);
8677         return errors;
8678 }
8679
8680 static int is_dropped_key(struct btrfs_key *key,
8681                           struct btrfs_key *drop_key) {
8682         if (key->objectid < drop_key->objectid)
8683                 return 1;
8684         else if (key->objectid == drop_key->objectid) {
8685                 if (key->type < drop_key->type)
8686                         return 1;
8687                 else if (key->type == drop_key->type) {
8688                         if (key->offset < drop_key->offset)
8689                                 return 1;
8690                 }
8691         }
8692         return 0;
8693 }
8694
8695 /*
8696  * Here are the rules for FULL_BACKREF.
8697  *
8698  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8699  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8700  *      FULL_BACKREF set.
8701  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8702  *    if it happened after the relocation occurred since we'll have dropped the
8703  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8704  *    have no real way to know for sure.
8705  *
8706  * We process the blocks one root at a time, and we start from the lowest root
8707  * objectid and go to the highest.  So we can just lookup the owner backref for
8708  * the record and if we don't find it then we know it doesn't exist and we have
8709  * a FULL BACKREF.
8710  *
8711  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8712  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8713  * be set or not and then we can check later once we've gathered all the refs.
8714  */
8715 static int calc_extent_flag(struct cache_tree *extent_cache,
8716                            struct extent_buffer *buf,
8717                            struct root_item_record *ri,
8718                            u64 *flags)
8719 {
8720         struct extent_record *rec;
8721         struct cache_extent *cache;
8722         struct tree_backref *tback;
8723         u64 owner = 0;
8724
8725         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8726         /* we have added this extent before */
8727         if (!cache)
8728                 return -ENOENT;
8729
8730         rec = container_of(cache, struct extent_record, cache);
8731
8732         /*
8733          * Except file/reloc tree, we can not have
8734          * FULL BACKREF MODE
8735          */
8736         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8737                 goto normal;
8738         /*
8739          * root node
8740          */
8741         if (buf->start == ri->bytenr)
8742                 goto normal;
8743
8744         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8745                 goto full_backref;
8746
8747         owner = btrfs_header_owner(buf);
8748         if (owner == ri->objectid)
8749                 goto normal;
8750
8751         tback = find_tree_backref(rec, 0, owner);
8752         if (!tback)
8753                 goto full_backref;
8754 normal:
8755         *flags = 0;
8756         if (rec->flag_block_full_backref != FLAG_UNSET &&
8757             rec->flag_block_full_backref != 0)
8758                 rec->bad_full_backref = 1;
8759         return 0;
8760 full_backref:
8761         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8762         if (rec->flag_block_full_backref != FLAG_UNSET &&
8763             rec->flag_block_full_backref != 1)
8764                 rec->bad_full_backref = 1;
8765         return 0;
8766 }
8767
8768 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8769 {
8770         fprintf(stderr, "Invalid key type(");
8771         print_key_type(stderr, 0, key_type);
8772         fprintf(stderr, ") found in root(");
8773         print_objectid(stderr, rootid, 0);
8774         fprintf(stderr, ")\n");
8775 }
8776
8777 /*
8778  * Check if the key is valid with its extent buffer.
8779  *
8780  * This is a early check in case invalid key exists in a extent buffer
8781  * This is not comprehensive yet, but should prevent wrong key/item passed
8782  * further
8783  */
8784 static int check_type_with_root(u64 rootid, u8 key_type)
8785 {
8786         switch (key_type) {
8787         /* Only valid in chunk tree */
8788         case BTRFS_DEV_ITEM_KEY:
8789         case BTRFS_CHUNK_ITEM_KEY:
8790                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8791                         goto err;
8792                 break;
8793         /* valid in csum and log tree */
8794         case BTRFS_CSUM_TREE_OBJECTID:
8795                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8796                       is_fstree(rootid)))
8797                         goto err;
8798                 break;
8799         case BTRFS_EXTENT_ITEM_KEY:
8800         case BTRFS_METADATA_ITEM_KEY:
8801         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8802                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8803                         goto err;
8804                 break;
8805         case BTRFS_ROOT_ITEM_KEY:
8806                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8807                         goto err;
8808                 break;
8809         case BTRFS_DEV_EXTENT_KEY:
8810                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8811                         goto err;
8812                 break;
8813         }
8814         return 0;
8815 err:
8816         report_mismatch_key_root(key_type, rootid);
8817         return -EINVAL;
8818 }
8819
8820 static int run_next_block(struct btrfs_root *root,
8821                           struct block_info *bits,
8822                           int bits_nr,
8823                           u64 *last,
8824                           struct cache_tree *pending,
8825                           struct cache_tree *seen,
8826                           struct cache_tree *reada,
8827                           struct cache_tree *nodes,
8828                           struct cache_tree *extent_cache,
8829                           struct cache_tree *chunk_cache,
8830                           struct rb_root *dev_cache,
8831                           struct block_group_tree *block_group_cache,
8832                           struct device_extent_tree *dev_extent_cache,
8833                           struct root_item_record *ri)
8834 {
8835         struct btrfs_fs_info *fs_info = root->fs_info;
8836         struct extent_buffer *buf;
8837         struct extent_record *rec = NULL;
8838         u64 bytenr;
8839         u32 size;
8840         u64 parent;
8841         u64 owner;
8842         u64 flags;
8843         u64 ptr;
8844         u64 gen = 0;
8845         int ret = 0;
8846         int i;
8847         int nritems;
8848         struct btrfs_key key;
8849         struct cache_extent *cache;
8850         int reada_bits;
8851
8852         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8853                                     bits_nr, &reada_bits);
8854         if (nritems == 0)
8855                 return 1;
8856
8857         if (!reada_bits) {
8858                 for(i = 0; i < nritems; i++) {
8859                         ret = add_cache_extent(reada, bits[i].start,
8860                                                bits[i].size);
8861                         if (ret == -EEXIST)
8862                                 continue;
8863
8864                         /* fixme, get the parent transid */
8865                         readahead_tree_block(fs_info, bits[i].start, 0);
8866                 }
8867         }
8868         *last = bits[0].start;
8869         bytenr = bits[0].start;
8870         size = bits[0].size;
8871
8872         cache = lookup_cache_extent(pending, bytenr, size);
8873         if (cache) {
8874                 remove_cache_extent(pending, cache);
8875                 free(cache);
8876         }
8877         cache = lookup_cache_extent(reada, bytenr, size);
8878         if (cache) {
8879                 remove_cache_extent(reada, cache);
8880                 free(cache);
8881         }
8882         cache = lookup_cache_extent(nodes, bytenr, size);
8883         if (cache) {
8884                 remove_cache_extent(nodes, cache);
8885                 free(cache);
8886         }
8887         cache = lookup_cache_extent(extent_cache, bytenr, size);
8888         if (cache) {
8889                 rec = container_of(cache, struct extent_record, cache);
8890                 gen = rec->parent_generation;
8891         }
8892
8893         /* fixme, get the real parent transid */
8894         buf = read_tree_block(root->fs_info, bytenr, gen);
8895         if (!extent_buffer_uptodate(buf)) {
8896                 record_bad_block_io(root->fs_info,
8897                                     extent_cache, bytenr, size);
8898                 goto out;
8899         }
8900
8901         nritems = btrfs_header_nritems(buf);
8902
8903         flags = 0;
8904         if (!init_extent_tree) {
8905                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8906                                        btrfs_header_level(buf), 1, NULL,
8907                                        &flags);
8908                 if (ret < 0) {
8909                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8910                         if (ret < 0) {
8911                                 fprintf(stderr, "Couldn't calc extent flags\n");
8912                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8913                         }
8914                 }
8915         } else {
8916                 flags = 0;
8917                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8918                 if (ret < 0) {
8919                         fprintf(stderr, "Couldn't calc extent flags\n");
8920                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8921                 }
8922         }
8923
8924         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8925                 if (ri != NULL &&
8926                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8927                     ri->objectid == btrfs_header_owner(buf)) {
8928                         /*
8929                          * Ok we got to this block from it's original owner and
8930                          * we have FULL_BACKREF set.  Relocation can leave
8931                          * converted blocks over so this is altogether possible,
8932                          * however it's not possible if the generation > the
8933                          * last snapshot, so check for this case.
8934                          */
8935                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8936                             btrfs_header_generation(buf) > ri->last_snapshot) {
8937                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
8938                                 rec->bad_full_backref = 1;
8939                         }
8940                 }
8941         } else {
8942                 if (ri != NULL &&
8943                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
8944                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
8945                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8946                         rec->bad_full_backref = 1;
8947                 }
8948         }
8949
8950         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8951                 rec->flag_block_full_backref = 1;
8952                 parent = bytenr;
8953                 owner = 0;
8954         } else {
8955                 rec->flag_block_full_backref = 0;
8956                 parent = 0;
8957                 owner = btrfs_header_owner(buf);
8958         }
8959
8960         ret = check_block(root, extent_cache, buf, flags);
8961         if (ret)
8962                 goto out;
8963
8964         if (btrfs_is_leaf(buf)) {
8965                 btree_space_waste += btrfs_leaf_free_space(root, buf);
8966                 for (i = 0; i < nritems; i++) {
8967                         struct btrfs_file_extent_item *fi;
8968                         btrfs_item_key_to_cpu(buf, &key, i);
8969                         /*
8970                          * Check key type against the leaf owner.
8971                          * Could filter quite a lot of early error if
8972                          * owner is correct
8973                          */
8974                         if (check_type_with_root(btrfs_header_owner(buf),
8975                                                  key.type)) {
8976                                 fprintf(stderr, "ignoring invalid key\n");
8977                                 continue;
8978                         }
8979                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
8980                                 process_extent_item(root, extent_cache, buf,
8981                                                     i);
8982                                 continue;
8983                         }
8984                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8985                                 process_extent_item(root, extent_cache, buf,
8986                                                     i);
8987                                 continue;
8988                         }
8989                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
8990                                 total_csum_bytes +=
8991                                         btrfs_item_size_nr(buf, i);
8992                                 continue;
8993                         }
8994                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
8995                                 process_chunk_item(chunk_cache, &key, buf, i);
8996                                 continue;
8997                         }
8998                         if (key.type == BTRFS_DEV_ITEM_KEY) {
8999                                 process_device_item(dev_cache, &key, buf, i);
9000                                 continue;
9001                         }
9002                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9003                                 process_block_group_item(block_group_cache,
9004                                         &key, buf, i);
9005                                 continue;
9006                         }
9007                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9008                                 process_device_extent_item(dev_extent_cache,
9009                                         &key, buf, i);
9010                                 continue;
9011
9012                         }
9013                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9014 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9015                                 process_extent_ref_v0(extent_cache, buf, i);
9016 #else
9017                                 BUG();
9018 #endif
9019                                 continue;
9020                         }
9021
9022                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9023                                 ret = add_tree_backref(extent_cache,
9024                                                 key.objectid, 0, key.offset, 0);
9025                                 if (ret < 0)
9026                                         error(
9027                                 "add_tree_backref failed (leaf tree block): %s",
9028                                               strerror(-ret));
9029                                 continue;
9030                         }
9031                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9032                                 ret = add_tree_backref(extent_cache,
9033                                                 key.objectid, key.offset, 0, 0);
9034                                 if (ret < 0)
9035                                         error(
9036                                 "add_tree_backref failed (leaf shared block): %s",
9037                                               strerror(-ret));
9038                                 continue;
9039                         }
9040                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9041                                 struct btrfs_extent_data_ref *ref;
9042                                 ref = btrfs_item_ptr(buf, i,
9043                                                 struct btrfs_extent_data_ref);
9044                                 add_data_backref(extent_cache,
9045                                         key.objectid, 0,
9046                                         btrfs_extent_data_ref_root(buf, ref),
9047                                         btrfs_extent_data_ref_objectid(buf,
9048                                                                        ref),
9049                                         btrfs_extent_data_ref_offset(buf, ref),
9050                                         btrfs_extent_data_ref_count(buf, ref),
9051                                         0, root->fs_info->sectorsize);
9052                                 continue;
9053                         }
9054                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9055                                 struct btrfs_shared_data_ref *ref;
9056                                 ref = btrfs_item_ptr(buf, i,
9057                                                 struct btrfs_shared_data_ref);
9058                                 add_data_backref(extent_cache,
9059                                         key.objectid, key.offset, 0, 0, 0,
9060                                         btrfs_shared_data_ref_count(buf, ref),
9061                                         0, root->fs_info->sectorsize);
9062                                 continue;
9063                         }
9064                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9065                                 struct bad_item *bad;
9066
9067                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9068                                         continue;
9069                                 if (!owner)
9070                                         continue;
9071                                 bad = malloc(sizeof(struct bad_item));
9072                                 if (!bad)
9073                                         continue;
9074                                 INIT_LIST_HEAD(&bad->list);
9075                                 memcpy(&bad->key, &key,
9076                                        sizeof(struct btrfs_key));
9077                                 bad->root_id = owner;
9078                                 list_add_tail(&bad->list, &delete_items);
9079                                 continue;
9080                         }
9081                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9082                                 continue;
9083                         fi = btrfs_item_ptr(buf, i,
9084                                             struct btrfs_file_extent_item);
9085                         if (btrfs_file_extent_type(buf, fi) ==
9086                             BTRFS_FILE_EXTENT_INLINE)
9087                                 continue;
9088                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9089                                 continue;
9090
9091                         data_bytes_allocated +=
9092                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9093                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9094                                 abort();
9095                         }
9096                         data_bytes_referenced +=
9097                                 btrfs_file_extent_num_bytes(buf, fi);
9098                         add_data_backref(extent_cache,
9099                                 btrfs_file_extent_disk_bytenr(buf, fi),
9100                                 parent, owner, key.objectid, key.offset -
9101                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9102                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9103                 }
9104         } else {
9105                 int level;
9106                 struct btrfs_key first_key;
9107
9108                 first_key.objectid = 0;
9109
9110                 if (nritems > 0)
9111                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9112                 level = btrfs_header_level(buf);
9113                 for (i = 0; i < nritems; i++) {
9114                         struct extent_record tmpl;
9115
9116                         ptr = btrfs_node_blockptr(buf, i);
9117                         size = root->fs_info->nodesize;
9118                         btrfs_node_key_to_cpu(buf, &key, i);
9119                         if (ri != NULL) {
9120                                 if ((level == ri->drop_level)
9121                                     && is_dropped_key(&key, &ri->drop_key)) {
9122                                         continue;
9123                                 }
9124                         }
9125
9126                         memset(&tmpl, 0, sizeof(tmpl));
9127                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9128                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9129                         tmpl.start = ptr;
9130                         tmpl.nr = size;
9131                         tmpl.refs = 1;
9132                         tmpl.metadata = 1;
9133                         tmpl.max_size = size;
9134                         ret = add_extent_rec(extent_cache, &tmpl);
9135                         if (ret < 0)
9136                                 goto out;
9137
9138                         ret = add_tree_backref(extent_cache, ptr, parent,
9139                                         owner, 1);
9140                         if (ret < 0) {
9141                                 error(
9142                                 "add_tree_backref failed (non-leaf block): %s",
9143                                       strerror(-ret));
9144                                 continue;
9145                         }
9146
9147                         if (level > 1) {
9148                                 add_pending(nodes, seen, ptr, size);
9149                         } else {
9150                                 add_pending(pending, seen, ptr, size);
9151                         }
9152                 }
9153                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9154                                       nritems) * sizeof(struct btrfs_key_ptr);
9155         }
9156         total_btree_bytes += buf->len;
9157         if (fs_root_objectid(btrfs_header_owner(buf)))
9158                 total_fs_tree_bytes += buf->len;
9159         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9160                 total_extent_tree_bytes += buf->len;
9161 out:
9162         free_extent_buffer(buf);
9163         return ret;
9164 }
9165
9166 static int add_root_to_pending(struct extent_buffer *buf,
9167                                struct cache_tree *extent_cache,
9168                                struct cache_tree *pending,
9169                                struct cache_tree *seen,
9170                                struct cache_tree *nodes,
9171                                u64 objectid)
9172 {
9173         struct extent_record tmpl;
9174         int ret;
9175
9176         if (btrfs_header_level(buf) > 0)
9177                 add_pending(nodes, seen, buf->start, buf->len);
9178         else
9179                 add_pending(pending, seen, buf->start, buf->len);
9180
9181         memset(&tmpl, 0, sizeof(tmpl));
9182         tmpl.start = buf->start;
9183         tmpl.nr = buf->len;
9184         tmpl.is_root = 1;
9185         tmpl.refs = 1;
9186         tmpl.metadata = 1;
9187         tmpl.max_size = buf->len;
9188         add_extent_rec(extent_cache, &tmpl);
9189
9190         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9191             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9192                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9193                                 0, 1);
9194         else
9195                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9196                                 1);
9197         return ret;
9198 }
9199
9200 /* as we fix the tree, we might be deleting blocks that
9201  * we're tracking for repair.  This hook makes sure we
9202  * remove any backrefs for blocks as we are fixing them.
9203  */
9204 static int free_extent_hook(struct btrfs_trans_handle *trans,
9205                             struct btrfs_root *root,
9206                             u64 bytenr, u64 num_bytes, u64 parent,
9207                             u64 root_objectid, u64 owner, u64 offset,
9208                             int refs_to_drop)
9209 {
9210         struct extent_record *rec;
9211         struct cache_extent *cache;
9212         int is_data;
9213         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9214
9215         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9216         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9217         if (!cache)
9218                 return 0;
9219
9220         rec = container_of(cache, struct extent_record, cache);
9221         if (is_data) {
9222                 struct data_backref *back;
9223                 back = find_data_backref(rec, parent, root_objectid, owner,
9224                                          offset, 1, bytenr, num_bytes);
9225                 if (!back)
9226                         goto out;
9227                 if (back->node.found_ref) {
9228                         back->found_ref -= refs_to_drop;
9229                         if (rec->refs)
9230                                 rec->refs -= refs_to_drop;
9231                 }
9232                 if (back->node.found_extent_tree) {
9233                         back->num_refs -= refs_to_drop;
9234                         if (rec->extent_item_refs)
9235                                 rec->extent_item_refs -= refs_to_drop;
9236                 }
9237                 if (back->found_ref == 0)
9238                         back->node.found_ref = 0;
9239                 if (back->num_refs == 0)
9240                         back->node.found_extent_tree = 0;
9241
9242                 if (!back->node.found_extent_tree && back->node.found_ref) {
9243                         rb_erase(&back->node.node, &rec->backref_tree);
9244                         free(back);
9245                 }
9246         } else {
9247                 struct tree_backref *back;
9248                 back = find_tree_backref(rec, parent, root_objectid);
9249                 if (!back)
9250                         goto out;
9251                 if (back->node.found_ref) {
9252                         if (rec->refs)
9253                                 rec->refs--;
9254                         back->node.found_ref = 0;
9255                 }
9256                 if (back->node.found_extent_tree) {
9257                         if (rec->extent_item_refs)
9258                                 rec->extent_item_refs--;
9259                         back->node.found_extent_tree = 0;
9260                 }
9261                 if (!back->node.found_extent_tree && back->node.found_ref) {
9262                         rb_erase(&back->node.node, &rec->backref_tree);
9263                         free(back);
9264                 }
9265         }
9266         maybe_free_extent_rec(extent_cache, rec);
9267 out:
9268         return 0;
9269 }
9270
9271 static int delete_extent_records(struct btrfs_trans_handle *trans,
9272                                  struct btrfs_root *root,
9273                                  struct btrfs_path *path,
9274                                  u64 bytenr)
9275 {
9276         struct btrfs_key key;
9277         struct btrfs_key found_key;
9278         struct extent_buffer *leaf;
9279         int ret;
9280         int slot;
9281
9282
9283         key.objectid = bytenr;
9284         key.type = (u8)-1;
9285         key.offset = (u64)-1;
9286
9287         while(1) {
9288                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9289                                         &key, path, 0, 1);
9290                 if (ret < 0)
9291                         break;
9292
9293                 if (ret > 0) {
9294                         ret = 0;
9295                         if (path->slots[0] == 0)
9296                                 break;
9297                         path->slots[0]--;
9298                 }
9299                 ret = 0;
9300
9301                 leaf = path->nodes[0];
9302                 slot = path->slots[0];
9303
9304                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9305                 if (found_key.objectid != bytenr)
9306                         break;
9307
9308                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9309                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9310                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9311                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9312                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9313                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9314                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9315                         btrfs_release_path(path);
9316                         if (found_key.type == 0) {
9317                                 if (found_key.offset == 0)
9318                                         break;
9319                                 key.offset = found_key.offset - 1;
9320                                 key.type = found_key.type;
9321                         }
9322                         key.type = found_key.type - 1;
9323                         key.offset = (u64)-1;
9324                         continue;
9325                 }
9326
9327                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9328                         found_key.objectid, found_key.type, found_key.offset);
9329
9330                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9331                 if (ret)
9332                         break;
9333                 btrfs_release_path(path);
9334
9335                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9336                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9337                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9338                                 found_key.offset : root->fs_info->nodesize;
9339
9340                         ret = btrfs_update_block_group(root, bytenr,
9341                                                        bytes, 0, 0);
9342                         if (ret)
9343                                 break;
9344                 }
9345         }
9346
9347         btrfs_release_path(path);
9348         return ret;
9349 }
9350
9351 /*
9352  * for a single backref, this will allocate a new extent
9353  * and add the backref to it.
9354  */
9355 static int record_extent(struct btrfs_trans_handle *trans,
9356                          struct btrfs_fs_info *info,
9357                          struct btrfs_path *path,
9358                          struct extent_record *rec,
9359                          struct extent_backref *back,
9360                          int allocated, u64 flags)
9361 {
9362         int ret = 0;
9363         struct btrfs_root *extent_root = info->extent_root;
9364         struct extent_buffer *leaf;
9365         struct btrfs_key ins_key;
9366         struct btrfs_extent_item *ei;
9367         struct data_backref *dback;
9368         struct btrfs_tree_block_info *bi;
9369
9370         if (!back->is_data)
9371                 rec->max_size = max_t(u64, rec->max_size,
9372                                     info->nodesize);
9373
9374         if (!allocated) {
9375                 u32 item_size = sizeof(*ei);
9376
9377                 if (!back->is_data)
9378                         item_size += sizeof(*bi);
9379
9380                 ins_key.objectid = rec->start;
9381                 ins_key.offset = rec->max_size;
9382                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9383
9384                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9385                                         &ins_key, item_size);
9386                 if (ret)
9387                         goto fail;
9388
9389                 leaf = path->nodes[0];
9390                 ei = btrfs_item_ptr(leaf, path->slots[0],
9391                                     struct btrfs_extent_item);
9392
9393                 btrfs_set_extent_refs(leaf, ei, 0);
9394                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9395
9396                 if (back->is_data) {
9397                         btrfs_set_extent_flags(leaf, ei,
9398                                                BTRFS_EXTENT_FLAG_DATA);
9399                 } else {
9400                         struct btrfs_disk_key copy_key;;
9401
9402                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9403                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9404                                              sizeof(*bi));
9405
9406                         btrfs_set_disk_key_objectid(&copy_key,
9407                                                     rec->info_objectid);
9408                         btrfs_set_disk_key_type(&copy_key, 0);
9409                         btrfs_set_disk_key_offset(&copy_key, 0);
9410
9411                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9412                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9413
9414                         btrfs_set_extent_flags(leaf, ei,
9415                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9416                 }
9417
9418                 btrfs_mark_buffer_dirty(leaf);
9419                 ret = btrfs_update_block_group(extent_root, rec->start,
9420                                                rec->max_size, 1, 0);
9421                 if (ret)
9422                         goto fail;
9423                 btrfs_release_path(path);
9424         }
9425
9426         if (back->is_data) {
9427                 u64 parent;
9428                 int i;
9429
9430                 dback = to_data_backref(back);
9431                 if (back->full_backref)
9432                         parent = dback->parent;
9433                 else
9434                         parent = 0;
9435
9436                 for (i = 0; i < dback->found_ref; i++) {
9437                         /* if parent != 0, we're doing a full backref
9438                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9439                          * just makes the backref allocator create a data
9440                          * backref
9441                          */
9442                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9443                                                    rec->start, rec->max_size,
9444                                                    parent,
9445                                                    dback->root,
9446                                                    parent ?
9447                                                    BTRFS_FIRST_FREE_OBJECTID :
9448                                                    dback->owner,
9449                                                    dback->offset);
9450                         if (ret)
9451                                 break;
9452                 }
9453                 fprintf(stderr, "adding new data backref"
9454                                 " on %llu %s %llu owner %llu"
9455                                 " offset %llu found %d\n",
9456                                 (unsigned long long)rec->start,
9457                                 back->full_backref ?
9458                                 "parent" : "root",
9459                                 back->full_backref ?
9460                                 (unsigned long long)parent :
9461                                 (unsigned long long)dback->root,
9462                                 (unsigned long long)dback->owner,
9463                                 (unsigned long long)dback->offset,
9464                                 dback->found_ref);
9465         } else {
9466                 u64 parent;
9467                 struct tree_backref *tback;
9468
9469                 tback = to_tree_backref(back);
9470                 if (back->full_backref)
9471                         parent = tback->parent;
9472                 else
9473                         parent = 0;
9474
9475                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9476                                            rec->start, rec->max_size,
9477                                            parent, tback->root, 0, 0);
9478                 fprintf(stderr, "adding new tree backref on "
9479                         "start %llu len %llu parent %llu root %llu\n",
9480                         rec->start, rec->max_size, parent, tback->root);
9481         }
9482 fail:
9483         btrfs_release_path(path);
9484         return ret;
9485 }
9486
9487 static struct extent_entry *find_entry(struct list_head *entries,
9488                                        u64 bytenr, u64 bytes)
9489 {
9490         struct extent_entry *entry = NULL;
9491
9492         list_for_each_entry(entry, entries, list) {
9493                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9494                         return entry;
9495         }
9496
9497         return NULL;
9498 }
9499
9500 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9501 {
9502         struct extent_entry *entry, *best = NULL, *prev = NULL;
9503
9504         list_for_each_entry(entry, entries, list) {
9505                 /*
9506                  * If there are as many broken entries as entries then we know
9507                  * not to trust this particular entry.
9508                  */
9509                 if (entry->broken == entry->count)
9510                         continue;
9511
9512                 /*
9513                  * Special case, when there are only two entries and 'best' is
9514                  * the first one
9515                  */
9516                 if (!prev) {
9517                         best = entry;
9518                         prev = entry;
9519                         continue;
9520                 }
9521
9522                 /*
9523                  * If our current entry == best then we can't be sure our best
9524                  * is really the best, so we need to keep searching.
9525                  */
9526                 if (best && best->count == entry->count) {
9527                         prev = entry;
9528                         best = NULL;
9529                         continue;
9530                 }
9531
9532                 /* Prev == entry, not good enough, have to keep searching */
9533                 if (!prev->broken && prev->count == entry->count)
9534                         continue;
9535
9536                 if (!best)
9537                         best = (prev->count > entry->count) ? prev : entry;
9538                 else if (best->count < entry->count)
9539                         best = entry;
9540                 prev = entry;
9541         }
9542
9543         return best;
9544 }
9545
9546 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9547                       struct data_backref *dback, struct extent_entry *entry)
9548 {
9549         struct btrfs_trans_handle *trans;
9550         struct btrfs_root *root;
9551         struct btrfs_file_extent_item *fi;
9552         struct extent_buffer *leaf;
9553         struct btrfs_key key;
9554         u64 bytenr, bytes;
9555         int ret, err;
9556
9557         key.objectid = dback->root;
9558         key.type = BTRFS_ROOT_ITEM_KEY;
9559         key.offset = (u64)-1;
9560         root = btrfs_read_fs_root(info, &key);
9561         if (IS_ERR(root)) {
9562                 fprintf(stderr, "Couldn't find root for our ref\n");
9563                 return -EINVAL;
9564         }
9565
9566         /*
9567          * The backref points to the original offset of the extent if it was
9568          * split, so we need to search down to the offset we have and then walk
9569          * forward until we find the backref we're looking for.
9570          */
9571         key.objectid = dback->owner;
9572         key.type = BTRFS_EXTENT_DATA_KEY;
9573         key.offset = dback->offset;
9574         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9575         if (ret < 0) {
9576                 fprintf(stderr, "Error looking up ref %d\n", ret);
9577                 return ret;
9578         }
9579
9580         while (1) {
9581                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9582                         ret = btrfs_next_leaf(root, path);
9583                         if (ret) {
9584                                 fprintf(stderr, "Couldn't find our ref, next\n");
9585                                 return -EINVAL;
9586                         }
9587                 }
9588                 leaf = path->nodes[0];
9589                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9590                 if (key.objectid != dback->owner ||
9591                     key.type != BTRFS_EXTENT_DATA_KEY) {
9592                         fprintf(stderr, "Couldn't find our ref, search\n");
9593                         return -EINVAL;
9594                 }
9595                 fi = btrfs_item_ptr(leaf, path->slots[0],
9596                                     struct btrfs_file_extent_item);
9597                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9598                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9599
9600                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9601                         break;
9602                 path->slots[0]++;
9603         }
9604
9605         btrfs_release_path(path);
9606
9607         trans = btrfs_start_transaction(root, 1);
9608         if (IS_ERR(trans))
9609                 return PTR_ERR(trans);
9610
9611         /*
9612          * Ok we have the key of the file extent we want to fix, now we can cow
9613          * down to the thing and fix it.
9614          */
9615         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9616         if (ret < 0) {
9617                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9618                         key.objectid, key.type, key.offset, ret);
9619                 goto out;
9620         }
9621         if (ret > 0) {
9622                 fprintf(stderr, "Well that's odd, we just found this key "
9623                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9624                         key.offset);
9625                 ret = -EINVAL;
9626                 goto out;
9627         }
9628         leaf = path->nodes[0];
9629         fi = btrfs_item_ptr(leaf, path->slots[0],
9630                             struct btrfs_file_extent_item);
9631
9632         if (btrfs_file_extent_compression(leaf, fi) &&
9633             dback->disk_bytenr != entry->bytenr) {
9634                 fprintf(stderr, "Ref doesn't match the record start and is "
9635                         "compressed, please take a btrfs-image of this file "
9636                         "system and send it to a btrfs developer so they can "
9637                         "complete this functionality for bytenr %Lu\n",
9638                         dback->disk_bytenr);
9639                 ret = -EINVAL;
9640                 goto out;
9641         }
9642
9643         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9644                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9645         } else if (dback->disk_bytenr > entry->bytenr) {
9646                 u64 off_diff, offset;
9647
9648                 off_diff = dback->disk_bytenr - entry->bytenr;
9649                 offset = btrfs_file_extent_offset(leaf, fi);
9650                 if (dback->disk_bytenr + offset +
9651                     btrfs_file_extent_num_bytes(leaf, fi) >
9652                     entry->bytenr + entry->bytes) {
9653                         fprintf(stderr, "Ref is past the entry end, please "
9654                                 "take a btrfs-image of this file system and "
9655                                 "send it to a btrfs developer, ref %Lu\n",
9656                                 dback->disk_bytenr);
9657                         ret = -EINVAL;
9658                         goto out;
9659                 }
9660                 offset += off_diff;
9661                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9662                 btrfs_set_file_extent_offset(leaf, fi, offset);
9663         } else if (dback->disk_bytenr < entry->bytenr) {
9664                 u64 offset;
9665
9666                 offset = btrfs_file_extent_offset(leaf, fi);
9667                 if (dback->disk_bytenr + offset < entry->bytenr) {
9668                         fprintf(stderr, "Ref is before the entry start, please"
9669                                 " take a btrfs-image of this file system and "
9670                                 "send it to a btrfs developer, ref %Lu\n",
9671                                 dback->disk_bytenr);
9672                         ret = -EINVAL;
9673                         goto out;
9674                 }
9675
9676                 offset += dback->disk_bytenr;
9677                 offset -= entry->bytenr;
9678                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9679                 btrfs_set_file_extent_offset(leaf, fi, offset);
9680         }
9681
9682         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9683
9684         /*
9685          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9686          * only do this if we aren't using compression, otherwise it's a
9687          * trickier case.
9688          */
9689         if (!btrfs_file_extent_compression(leaf, fi))
9690                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9691         else
9692                 printf("ram bytes may be wrong?\n");
9693         btrfs_mark_buffer_dirty(leaf);
9694 out:
9695         err = btrfs_commit_transaction(trans, root);
9696         btrfs_release_path(path);
9697         return ret ? ret : err;
9698 }
9699
9700 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9701                            struct extent_record *rec)
9702 {
9703         struct extent_backref *back, *tmp;
9704         struct data_backref *dback;
9705         struct extent_entry *entry, *best = NULL;
9706         LIST_HEAD(entries);
9707         int nr_entries = 0;
9708         int broken_entries = 0;
9709         int ret = 0;
9710         short mismatch = 0;
9711
9712         /*
9713          * Metadata is easy and the backrefs should always agree on bytenr and
9714          * size, if not we've got bigger issues.
9715          */
9716         if (rec->metadata)
9717                 return 0;
9718
9719         rbtree_postorder_for_each_entry_safe(back, tmp,
9720                                              &rec->backref_tree, node) {
9721                 if (back->full_backref || !back->is_data)
9722                         continue;
9723
9724                 dback = to_data_backref(back);
9725
9726                 /*
9727                  * We only pay attention to backrefs that we found a real
9728                  * backref for.
9729                  */
9730                 if (dback->found_ref == 0)
9731                         continue;
9732
9733                 /*
9734                  * For now we only catch when the bytes don't match, not the
9735                  * bytenr.  We can easily do this at the same time, but I want
9736                  * to have a fs image to test on before we just add repair
9737                  * functionality willy-nilly so we know we won't screw up the
9738                  * repair.
9739                  */
9740
9741                 entry = find_entry(&entries, dback->disk_bytenr,
9742                                    dback->bytes);
9743                 if (!entry) {
9744                         entry = malloc(sizeof(struct extent_entry));
9745                         if (!entry) {
9746                                 ret = -ENOMEM;
9747                                 goto out;
9748                         }
9749                         memset(entry, 0, sizeof(*entry));
9750                         entry->bytenr = dback->disk_bytenr;
9751                         entry->bytes = dback->bytes;
9752                         list_add_tail(&entry->list, &entries);
9753                         nr_entries++;
9754                 }
9755
9756                 /*
9757                  * If we only have on entry we may think the entries agree when
9758                  * in reality they don't so we have to do some extra checking.
9759                  */
9760                 if (dback->disk_bytenr != rec->start ||
9761                     dback->bytes != rec->nr || back->broken)
9762                         mismatch = 1;
9763
9764                 if (back->broken) {
9765                         entry->broken++;
9766                         broken_entries++;
9767                 }
9768
9769                 entry->count++;
9770         }
9771
9772         /* Yay all the backrefs agree, carry on good sir */
9773         if (nr_entries <= 1 && !mismatch)
9774                 goto out;
9775
9776         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9777                 "%Lu\n", rec->start);
9778
9779         /*
9780          * First we want to see if the backrefs can agree amongst themselves who
9781          * is right, so figure out which one of the entries has the highest
9782          * count.
9783          */
9784         best = find_most_right_entry(&entries);
9785
9786         /*
9787          * Ok so we may have an even split between what the backrefs think, so
9788          * this is where we use the extent ref to see what it thinks.
9789          */
9790         if (!best) {
9791                 entry = find_entry(&entries, rec->start, rec->nr);
9792                 if (!entry && (!broken_entries || !rec->found_rec)) {
9793                         fprintf(stderr, "Backrefs don't agree with each other "
9794                                 "and extent record doesn't agree with anybody,"
9795                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9796                                 rec->start, rec->nr);
9797                         ret = -EINVAL;
9798                         goto out;
9799                 } else if (!entry) {
9800                         /*
9801                          * Ok our backrefs were broken, we'll assume this is the
9802                          * correct value and add an entry for this range.
9803                          */
9804                         entry = malloc(sizeof(struct extent_entry));
9805                         if (!entry) {
9806                                 ret = -ENOMEM;
9807                                 goto out;
9808                         }
9809                         memset(entry, 0, sizeof(*entry));
9810                         entry->bytenr = rec->start;
9811                         entry->bytes = rec->nr;
9812                         list_add_tail(&entry->list, &entries);
9813                         nr_entries++;
9814                 }
9815                 entry->count++;
9816                 best = find_most_right_entry(&entries);
9817                 if (!best) {
9818                         fprintf(stderr, "Backrefs and extent record evenly "
9819                                 "split on who is right, this is going to "
9820                                 "require user input to fix bytenr %Lu bytes "
9821                                 "%Lu\n", rec->start, rec->nr);
9822                         ret = -EINVAL;
9823                         goto out;
9824                 }
9825         }
9826
9827         /*
9828          * I don't think this can happen currently as we'll abort() if we catch
9829          * this case higher up, but in case somebody removes that we still can't
9830          * deal with it properly here yet, so just bail out of that's the case.
9831          */
9832         if (best->bytenr != rec->start) {
9833                 fprintf(stderr, "Extent start and backref starts don't match, "
9834                         "please use btrfs-image on this file system and send "
9835                         "it to a btrfs developer so they can make fsck fix "
9836                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9837                         rec->start, rec->nr);
9838                 ret = -EINVAL;
9839                 goto out;
9840         }
9841
9842         /*
9843          * Ok great we all agreed on an extent record, let's go find the real
9844          * references and fix up the ones that don't match.
9845          */
9846         rbtree_postorder_for_each_entry_safe(back, tmp,
9847                                              &rec->backref_tree, node) {
9848                 if (back->full_backref || !back->is_data)
9849                         continue;
9850
9851                 dback = to_data_backref(back);
9852
9853                 /*
9854                  * Still ignoring backrefs that don't have a real ref attached
9855                  * to them.
9856                  */
9857                 if (dback->found_ref == 0)
9858                         continue;
9859
9860                 if (dback->bytes == best->bytes &&
9861                     dback->disk_bytenr == best->bytenr)
9862                         continue;
9863
9864                 ret = repair_ref(info, path, dback, best);
9865                 if (ret)
9866                         goto out;
9867         }
9868
9869         /*
9870          * Ok we messed with the actual refs, which means we need to drop our
9871          * entire cache and go back and rescan.  I know this is a huge pain and
9872          * adds a lot of extra work, but it's the only way to be safe.  Once all
9873          * the backrefs agree we may not need to do anything to the extent
9874          * record itself.
9875          */
9876         ret = -EAGAIN;
9877 out:
9878         while (!list_empty(&entries)) {
9879                 entry = list_entry(entries.next, struct extent_entry, list);
9880                 list_del_init(&entry->list);
9881                 free(entry);
9882         }
9883         return ret;
9884 }
9885
9886 static int process_duplicates(struct cache_tree *extent_cache,
9887                               struct extent_record *rec)
9888 {
9889         struct extent_record *good, *tmp;
9890         struct cache_extent *cache;
9891         int ret;
9892
9893         /*
9894          * If we found a extent record for this extent then return, or if we
9895          * have more than one duplicate we are likely going to need to delete
9896          * something.
9897          */
9898         if (rec->found_rec || rec->num_duplicates > 1)
9899                 return 0;
9900
9901         /* Shouldn't happen but just in case */
9902         BUG_ON(!rec->num_duplicates);
9903
9904         /*
9905          * So this happens if we end up with a backref that doesn't match the
9906          * actual extent entry.  So either the backref is bad or the extent
9907          * entry is bad.  Either way we want to have the extent_record actually
9908          * reflect what we found in the extent_tree, so we need to take the
9909          * duplicate out and use that as the extent_record since the only way we
9910          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9911          */
9912         remove_cache_extent(extent_cache, &rec->cache);
9913
9914         good = to_extent_record(rec->dups.next);
9915         list_del_init(&good->list);
9916         INIT_LIST_HEAD(&good->backrefs);
9917         INIT_LIST_HEAD(&good->dups);
9918         good->cache.start = good->start;
9919         good->cache.size = good->nr;
9920         good->content_checked = 0;
9921         good->owner_ref_checked = 0;
9922         good->num_duplicates = 0;
9923         good->refs = rec->refs;
9924         list_splice_init(&rec->backrefs, &good->backrefs);
9925         while (1) {
9926                 cache = lookup_cache_extent(extent_cache, good->start,
9927                                             good->nr);
9928                 if (!cache)
9929                         break;
9930                 tmp = container_of(cache, struct extent_record, cache);
9931
9932                 /*
9933                  * If we find another overlapping extent and it's found_rec is
9934                  * set then it's a duplicate and we need to try and delete
9935                  * something.
9936                  */
9937                 if (tmp->found_rec || tmp->num_duplicates > 0) {
9938                         if (list_empty(&good->list))
9939                                 list_add_tail(&good->list,
9940                                               &duplicate_extents);
9941                         good->num_duplicates += tmp->num_duplicates + 1;
9942                         list_splice_init(&tmp->dups, &good->dups);
9943                         list_del_init(&tmp->list);
9944                         list_add_tail(&tmp->list, &good->dups);
9945                         remove_cache_extent(extent_cache, &tmp->cache);
9946                         continue;
9947                 }
9948
9949                 /*
9950                  * Ok we have another non extent item backed extent rec, so lets
9951                  * just add it to this extent and carry on like we did above.
9952                  */
9953                 good->refs += tmp->refs;
9954                 list_splice_init(&tmp->backrefs, &good->backrefs);
9955                 remove_cache_extent(extent_cache, &tmp->cache);
9956                 free(tmp);
9957         }
9958         ret = insert_cache_extent(extent_cache, &good->cache);
9959         BUG_ON(ret);
9960         free(rec);
9961         return good->num_duplicates ? 0 : 1;
9962 }
9963
9964 static int delete_duplicate_records(struct btrfs_root *root,
9965                                     struct extent_record *rec)
9966 {
9967         struct btrfs_trans_handle *trans;
9968         LIST_HEAD(delete_list);
9969         struct btrfs_path path;
9970         struct extent_record *tmp, *good, *n;
9971         int nr_del = 0;
9972         int ret = 0, err;
9973         struct btrfs_key key;
9974
9975         btrfs_init_path(&path);
9976
9977         good = rec;
9978         /* Find the record that covers all of the duplicates. */
9979         list_for_each_entry(tmp, &rec->dups, list) {
9980                 if (good->start < tmp->start)
9981                         continue;
9982                 if (good->nr > tmp->nr)
9983                         continue;
9984
9985                 if (tmp->start + tmp->nr < good->start + good->nr) {
9986                         fprintf(stderr, "Ok we have overlapping extents that "
9987                                 "aren't completely covered by each other, this "
9988                                 "is going to require more careful thought.  "
9989                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
9990                                 tmp->start, tmp->nr, good->start, good->nr);
9991                         abort();
9992                 }
9993                 good = tmp;
9994         }
9995
9996         if (good != rec)
9997                 list_add_tail(&rec->list, &delete_list);
9998
9999         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10000                 if (tmp == good)
10001                         continue;
10002                 list_move_tail(&tmp->list, &delete_list);
10003         }
10004
10005         root = root->fs_info->extent_root;
10006         trans = btrfs_start_transaction(root, 1);
10007         if (IS_ERR(trans)) {
10008                 ret = PTR_ERR(trans);
10009                 goto out;
10010         }
10011
10012         list_for_each_entry(tmp, &delete_list, list) {
10013                 if (tmp->found_rec == 0)
10014                         continue;
10015                 key.objectid = tmp->start;
10016                 key.type = BTRFS_EXTENT_ITEM_KEY;
10017                 key.offset = tmp->nr;
10018
10019                 /* Shouldn't happen but just in case */
10020                 if (tmp->metadata) {
10021                         fprintf(stderr, "Well this shouldn't happen, extent "
10022                                 "record overlaps but is metadata? "
10023                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10024                         abort();
10025                 }
10026
10027                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10028                 if (ret) {
10029                         if (ret > 0)
10030                                 ret = -EINVAL;
10031                         break;
10032                 }
10033                 ret = btrfs_del_item(trans, root, &path);
10034                 if (ret)
10035                         break;
10036                 btrfs_release_path(&path);
10037                 nr_del++;
10038         }
10039         err = btrfs_commit_transaction(trans, root);
10040         if (err && !ret)
10041                 ret = err;
10042 out:
10043         while (!list_empty(&delete_list)) {
10044                 tmp = to_extent_record(delete_list.next);
10045                 list_del_init(&tmp->list);
10046                 if (tmp == rec)
10047                         continue;
10048                 free(tmp);
10049         }
10050
10051         while (!list_empty(&rec->dups)) {
10052                 tmp = to_extent_record(rec->dups.next);
10053                 list_del_init(&tmp->list);
10054                 free(tmp);
10055         }
10056
10057         btrfs_release_path(&path);
10058
10059         if (!ret && !nr_del)
10060                 rec->num_duplicates = 0;
10061
10062         return ret ? ret : nr_del;
10063 }
10064
10065 static int find_possible_backrefs(struct btrfs_fs_info *info,
10066                                   struct btrfs_path *path,
10067                                   struct cache_tree *extent_cache,
10068                                   struct extent_record *rec)
10069 {
10070         struct btrfs_root *root;
10071         struct extent_backref *back, *tmp;
10072         struct data_backref *dback;
10073         struct cache_extent *cache;
10074         struct btrfs_file_extent_item *fi;
10075         struct btrfs_key key;
10076         u64 bytenr, bytes;
10077         int ret;
10078
10079         rbtree_postorder_for_each_entry_safe(back, tmp,
10080                                              &rec->backref_tree, node) {
10081                 /* Don't care about full backrefs (poor unloved backrefs) */
10082                 if (back->full_backref || !back->is_data)
10083                         continue;
10084
10085                 dback = to_data_backref(back);
10086
10087                 /* We found this one, we don't need to do a lookup */
10088                 if (dback->found_ref)
10089                         continue;
10090
10091                 key.objectid = dback->root;
10092                 key.type = BTRFS_ROOT_ITEM_KEY;
10093                 key.offset = (u64)-1;
10094
10095                 root = btrfs_read_fs_root(info, &key);
10096
10097                 /* No root, definitely a bad ref, skip */
10098                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10099                         continue;
10100                 /* Other err, exit */
10101                 if (IS_ERR(root))
10102                         return PTR_ERR(root);
10103
10104                 key.objectid = dback->owner;
10105                 key.type = BTRFS_EXTENT_DATA_KEY;
10106                 key.offset = dback->offset;
10107                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10108                 if (ret) {
10109                         btrfs_release_path(path);
10110                         if (ret < 0)
10111                                 return ret;
10112                         /* Didn't find it, we can carry on */
10113                         ret = 0;
10114                         continue;
10115                 }
10116
10117                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10118                                     struct btrfs_file_extent_item);
10119                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10120                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10121                 btrfs_release_path(path);
10122                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10123                 if (cache) {
10124                         struct extent_record *tmp;
10125                         tmp = container_of(cache, struct extent_record, cache);
10126
10127                         /*
10128                          * If we found an extent record for the bytenr for this
10129                          * particular backref then we can't add it to our
10130                          * current extent record.  We only want to add backrefs
10131                          * that don't have a corresponding extent item in the
10132                          * extent tree since they likely belong to this record
10133                          * and we need to fix it if it doesn't match bytenrs.
10134                          */
10135                         if  (tmp->found_rec)
10136                                 continue;
10137                 }
10138
10139                 dback->found_ref += 1;
10140                 dback->disk_bytenr = bytenr;
10141                 dback->bytes = bytes;
10142
10143                 /*
10144                  * Set this so the verify backref code knows not to trust the
10145                  * values in this backref.
10146                  */
10147                 back->broken = 1;
10148         }
10149
10150         return 0;
10151 }
10152
10153 /*
10154  * Record orphan data ref into corresponding root.
10155  *
10156  * Return 0 if the extent item contains data ref and recorded.
10157  * Return 1 if the extent item contains no useful data ref
10158  *   On that case, it may contains only shared_dataref or metadata backref
10159  *   or the file extent exists(this should be handled by the extent bytenr
10160  *   recovery routine)
10161  * Return <0 if something goes wrong.
10162  */
10163 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10164                                       struct extent_record *rec)
10165 {
10166         struct btrfs_key key;
10167         struct btrfs_root *dest_root;
10168         struct extent_backref *back, *tmp;
10169         struct data_backref *dback;
10170         struct orphan_data_extent *orphan;
10171         struct btrfs_path path;
10172         int recorded_data_ref = 0;
10173         int ret = 0;
10174
10175         if (rec->metadata)
10176                 return 1;
10177         btrfs_init_path(&path);
10178         rbtree_postorder_for_each_entry_safe(back, tmp,
10179                                              &rec->backref_tree, node) {
10180                 if (back->full_backref || !back->is_data ||
10181                     !back->found_extent_tree)
10182                         continue;
10183                 dback = to_data_backref(back);
10184                 if (dback->found_ref)
10185                         continue;
10186                 key.objectid = dback->root;
10187                 key.type = BTRFS_ROOT_ITEM_KEY;
10188                 key.offset = (u64)-1;
10189
10190                 dest_root = btrfs_read_fs_root(fs_info, &key);
10191
10192                 /* For non-exist root we just skip it */
10193                 if (IS_ERR(dest_root) || !dest_root)
10194                         continue;
10195
10196                 key.objectid = dback->owner;
10197                 key.type = BTRFS_EXTENT_DATA_KEY;
10198                 key.offset = dback->offset;
10199
10200                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10201                 btrfs_release_path(&path);
10202                 /*
10203                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10204                  * we need to record it for inode/file extent rebuild.
10205                  * For ret > 0, we record it only for file extent rebuild.
10206                  * For ret == 0, the file extent exists but only bytenr
10207                  * mismatch, let the original bytenr fix routine to handle,
10208                  * don't record it.
10209                  */
10210                 if (ret == 0)
10211                         continue;
10212                 ret = 0;
10213                 orphan = malloc(sizeof(*orphan));
10214                 if (!orphan) {
10215                         ret = -ENOMEM;
10216                         goto out;
10217                 }
10218                 INIT_LIST_HEAD(&orphan->list);
10219                 orphan->root = dback->root;
10220                 orphan->objectid = dback->owner;
10221                 orphan->offset = dback->offset;
10222                 orphan->disk_bytenr = rec->cache.start;
10223                 orphan->disk_len = rec->cache.size;
10224                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10225                 recorded_data_ref = 1;
10226         }
10227 out:
10228         btrfs_release_path(&path);
10229         if (!ret)
10230                 return !recorded_data_ref;
10231         else
10232                 return ret;
10233 }
10234
10235 /*
10236  * when an incorrect extent item is found, this will delete
10237  * all of the existing entries for it and recreate them
10238  * based on what the tree scan found.
10239  */
10240 static int fixup_extent_refs(struct btrfs_fs_info *info,
10241                              struct cache_tree *extent_cache,
10242                              struct extent_record *rec)
10243 {
10244         struct btrfs_trans_handle *trans = NULL;
10245         int ret;
10246         struct btrfs_path path;
10247         struct cache_extent *cache;
10248         struct extent_backref *back, *tmp;
10249         int allocated = 0;
10250         u64 flags = 0;
10251
10252         if (rec->flag_block_full_backref)
10253                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10254
10255         btrfs_init_path(&path);
10256         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10257                 /*
10258                  * Sometimes the backrefs themselves are so broken they don't
10259                  * get attached to any meaningful rec, so first go back and
10260                  * check any of our backrefs that we couldn't find and throw
10261                  * them into the list if we find the backref so that
10262                  * verify_backrefs can figure out what to do.
10263                  */
10264                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10265                 if (ret < 0)
10266                         goto out;
10267         }
10268
10269         /* step one, make sure all of the backrefs agree */
10270         ret = verify_backrefs(info, &path, rec);
10271         if (ret < 0)
10272                 goto out;
10273
10274         trans = btrfs_start_transaction(info->extent_root, 1);
10275         if (IS_ERR(trans)) {
10276                 ret = PTR_ERR(trans);
10277                 goto out;
10278         }
10279
10280         /* step two, delete all the existing records */
10281         ret = delete_extent_records(trans, info->extent_root, &path,
10282                                     rec->start);
10283
10284         if (ret < 0)
10285                 goto out;
10286
10287         /* was this block corrupt?  If so, don't add references to it */
10288         cache = lookup_cache_extent(info->corrupt_blocks,
10289                                     rec->start, rec->max_size);
10290         if (cache) {
10291                 ret = 0;
10292                 goto out;
10293         }
10294
10295         /* step three, recreate all the refs we did find */
10296         rbtree_postorder_for_each_entry_safe(back, tmp,
10297                                              &rec->backref_tree, node) {
10298                 /*
10299                  * if we didn't find any references, don't create a
10300                  * new extent record
10301                  */
10302                 if (!back->found_ref)
10303                         continue;
10304
10305                 rec->bad_full_backref = 0;
10306                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10307                 allocated = 1;
10308
10309                 if (ret)
10310                         goto out;
10311         }
10312 out:
10313         if (trans) {
10314                 int err = btrfs_commit_transaction(trans, info->extent_root);
10315                 if (!ret)
10316                         ret = err;
10317         }
10318
10319         if (!ret)
10320                 fprintf(stderr, "Repaired extent references for %llu\n",
10321                                 (unsigned long long)rec->start);
10322
10323         btrfs_release_path(&path);
10324         return ret;
10325 }
10326
10327 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10328                               struct extent_record *rec)
10329 {
10330         struct btrfs_trans_handle *trans;
10331         struct btrfs_root *root = fs_info->extent_root;
10332         struct btrfs_path path;
10333         struct btrfs_extent_item *ei;
10334         struct btrfs_key key;
10335         u64 flags;
10336         int ret = 0;
10337
10338         key.objectid = rec->start;
10339         if (rec->metadata) {
10340                 key.type = BTRFS_METADATA_ITEM_KEY;
10341                 key.offset = rec->info_level;
10342         } else {
10343                 key.type = BTRFS_EXTENT_ITEM_KEY;
10344                 key.offset = rec->max_size;
10345         }
10346
10347         trans = btrfs_start_transaction(root, 0);
10348         if (IS_ERR(trans))
10349                 return PTR_ERR(trans);
10350
10351         btrfs_init_path(&path);
10352         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10353         if (ret < 0) {
10354                 btrfs_release_path(&path);
10355                 btrfs_commit_transaction(trans, root);
10356                 return ret;
10357         } else if (ret) {
10358                 fprintf(stderr, "Didn't find extent for %llu\n",
10359                         (unsigned long long)rec->start);
10360                 btrfs_release_path(&path);
10361                 btrfs_commit_transaction(trans, root);
10362                 return -ENOENT;
10363         }
10364
10365         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10366                             struct btrfs_extent_item);
10367         flags = btrfs_extent_flags(path.nodes[0], ei);
10368         if (rec->flag_block_full_backref) {
10369                 fprintf(stderr, "setting full backref on %llu\n",
10370                         (unsigned long long)key.objectid);
10371                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10372         } else {
10373                 fprintf(stderr, "clearing full backref on %llu\n",
10374                         (unsigned long long)key.objectid);
10375                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10376         }
10377         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10378         btrfs_mark_buffer_dirty(path.nodes[0]);
10379         btrfs_release_path(&path);
10380         ret = btrfs_commit_transaction(trans, root);
10381         if (!ret)
10382                 fprintf(stderr, "Repaired extent flags for %llu\n",
10383                                 (unsigned long long)rec->start);
10384
10385         return ret;
10386 }
10387
10388 /* right now we only prune from the extent allocation tree */
10389 static int prune_one_block(struct btrfs_trans_handle *trans,
10390                            struct btrfs_fs_info *info,
10391                            struct btrfs_corrupt_block *corrupt)
10392 {
10393         int ret;
10394         struct btrfs_path path;
10395         struct extent_buffer *eb;
10396         u64 found;
10397         int slot;
10398         int nritems;
10399         int level = corrupt->level + 1;
10400
10401         btrfs_init_path(&path);
10402 again:
10403         /* we want to stop at the parent to our busted block */
10404         path.lowest_level = level;
10405
10406         ret = btrfs_search_slot(trans, info->extent_root,
10407                                 &corrupt->key, &path, -1, 1);
10408
10409         if (ret < 0)
10410                 goto out;
10411
10412         eb = path.nodes[level];
10413         if (!eb) {
10414                 ret = -ENOENT;
10415                 goto out;
10416         }
10417
10418         /*
10419          * hopefully the search gave us the block we want to prune,
10420          * lets try that first
10421          */
10422         slot = path.slots[level];
10423         found =  btrfs_node_blockptr(eb, slot);
10424         if (found == corrupt->cache.start)
10425                 goto del_ptr;
10426
10427         nritems = btrfs_header_nritems(eb);
10428
10429         /* the search failed, lets scan this node and hope we find it */
10430         for (slot = 0; slot < nritems; slot++) {
10431                 found =  btrfs_node_blockptr(eb, slot);
10432                 if (found == corrupt->cache.start)
10433                         goto del_ptr;
10434         }
10435         /*
10436          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10437          * to this block
10438          */
10439         if (eb == info->extent_root->node) {
10440                 ret = -ENOENT;
10441                 goto out;
10442         } else {
10443                 level++;
10444                 btrfs_release_path(&path);
10445                 goto again;
10446         }
10447
10448 del_ptr:
10449         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10450         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10451
10452 out:
10453         btrfs_release_path(&path);
10454         return ret;
10455 }
10456
10457 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10458 {
10459         struct btrfs_trans_handle *trans = NULL;
10460         struct cache_extent *cache;
10461         struct btrfs_corrupt_block *corrupt;
10462
10463         while (1) {
10464                 cache = search_cache_extent(info->corrupt_blocks, 0);
10465                 if (!cache)
10466                         break;
10467                 if (!trans) {
10468                         trans = btrfs_start_transaction(info->extent_root, 1);
10469                         if (IS_ERR(trans))
10470                                 return PTR_ERR(trans);
10471                 }
10472                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10473                 prune_one_block(trans, info, corrupt);
10474                 remove_cache_extent(info->corrupt_blocks, cache);
10475         }
10476         if (trans)
10477                 return btrfs_commit_transaction(trans, info->extent_root);
10478         return 0;
10479 }
10480
10481 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10482 {
10483         struct btrfs_block_group_cache *cache;
10484         u64 start, end;
10485         int ret;
10486
10487         while (1) {
10488                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10489                                             &start, &end, EXTENT_DIRTY);
10490                 if (ret)
10491                         break;
10492                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10493         }
10494
10495         start = 0;
10496         while (1) {
10497                 cache = btrfs_lookup_first_block_group(fs_info, start);
10498                 if (!cache)
10499                         break;
10500                 if (cache->cached)
10501                         cache->cached = 0;
10502                 start = cache->key.objectid + cache->key.offset;
10503         }
10504 }
10505
10506 static int check_extent_refs(struct btrfs_root *root,
10507                              struct cache_tree *extent_cache)
10508 {
10509         struct extent_record *rec;
10510         struct cache_extent *cache;
10511         int ret = 0;
10512         int had_dups = 0;
10513         int err = 0;
10514
10515         if (repair) {
10516                 /*
10517                  * if we're doing a repair, we have to make sure
10518                  * we don't allocate from the problem extents.
10519                  * In the worst case, this will be all the
10520                  * extents in the FS
10521                  */
10522                 cache = search_cache_extent(extent_cache, 0);
10523                 while(cache) {
10524                         rec = container_of(cache, struct extent_record, cache);
10525                         set_extent_dirty(root->fs_info->excluded_extents,
10526                                          rec->start,
10527                                          rec->start + rec->max_size - 1);
10528                         cache = next_cache_extent(cache);
10529                 }
10530
10531                 /* pin down all the corrupted blocks too */
10532                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10533                 while(cache) {
10534                         set_extent_dirty(root->fs_info->excluded_extents,
10535                                          cache->start,
10536                                          cache->start + cache->size - 1);
10537                         cache = next_cache_extent(cache);
10538                 }
10539                 prune_corrupt_blocks(root->fs_info);
10540                 reset_cached_block_groups(root->fs_info);
10541         }
10542
10543         reset_cached_block_groups(root->fs_info);
10544
10545         /*
10546          * We need to delete any duplicate entries we find first otherwise we
10547          * could mess up the extent tree when we have backrefs that actually
10548          * belong to a different extent item and not the weird duplicate one.
10549          */
10550         while (repair && !list_empty(&duplicate_extents)) {
10551                 rec = to_extent_record(duplicate_extents.next);
10552                 list_del_init(&rec->list);
10553
10554                 /* Sometimes we can find a backref before we find an actual
10555                  * extent, so we need to process it a little bit to see if there
10556                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10557                  * if this is a backref screwup.  If we need to delete stuff
10558                  * process_duplicates() will return 0, otherwise it will return
10559                  * 1 and we
10560                  */
10561                 if (process_duplicates(extent_cache, rec))
10562                         continue;
10563                 ret = delete_duplicate_records(root, rec);
10564                 if (ret < 0)
10565                         return ret;
10566                 /*
10567                  * delete_duplicate_records will return the number of entries
10568                  * deleted, so if it's greater than 0 then we know we actually
10569                  * did something and we need to remove.
10570                  */
10571                 if (ret)
10572                         had_dups = 1;
10573         }
10574
10575         if (had_dups)
10576                 return -EAGAIN;
10577
10578         while(1) {
10579                 int cur_err = 0;
10580                 int fix = 0;
10581
10582                 cache = search_cache_extent(extent_cache, 0);
10583                 if (!cache)
10584                         break;
10585                 rec = container_of(cache, struct extent_record, cache);
10586                 if (rec->num_duplicates) {
10587                         fprintf(stderr, "extent item %llu has multiple extent "
10588                                 "items\n", (unsigned long long)rec->start);
10589                         cur_err = 1;
10590                 }
10591
10592                 if (rec->refs != rec->extent_item_refs) {
10593                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10594                                 (unsigned long long)rec->start,
10595                                 (unsigned long long)rec->nr);
10596                         fprintf(stderr, "extent item %llu, found %llu\n",
10597                                 (unsigned long long)rec->extent_item_refs,
10598                                 (unsigned long long)rec->refs);
10599                         ret = record_orphan_data_extents(root->fs_info, rec);
10600                         if (ret < 0)
10601                                 goto repair_abort;
10602                         fix = ret;
10603                         cur_err = 1;
10604                 }
10605                 if (all_backpointers_checked(rec, 1)) {
10606                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10607                                 (unsigned long long)rec->start,
10608                                 (unsigned long long)rec->nr);
10609                         fix = 1;
10610                         cur_err = 1;
10611                 }
10612                 if (!rec->owner_ref_checked) {
10613                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10614                                 (unsigned long long)rec->start,
10615                                 (unsigned long long)rec->nr);
10616                         fix = 1;
10617                         cur_err = 1;
10618                 }
10619
10620                 if (repair && fix) {
10621                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10622                         if (ret)
10623                                 goto repair_abort;
10624                 }
10625
10626
10627                 if (rec->bad_full_backref) {
10628                         fprintf(stderr, "bad full backref, on [%llu]\n",
10629                                 (unsigned long long)rec->start);
10630                         if (repair) {
10631                                 ret = fixup_extent_flags(root->fs_info, rec);
10632                                 if (ret)
10633                                         goto repair_abort;
10634                                 fix = 1;
10635                         }
10636                         cur_err = 1;
10637                 }
10638                 /*
10639                  * Although it's not a extent ref's problem, we reuse this
10640                  * routine for error reporting.
10641                  * No repair function yet.
10642                  */
10643                 if (rec->crossing_stripes) {
10644                         fprintf(stderr,
10645                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10646                                 rec->start, rec->start + rec->max_size);
10647                         cur_err = 1;
10648                 }
10649
10650                 if (rec->wrong_chunk_type) {
10651                         fprintf(stderr,
10652                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10653                                 rec->start, rec->start + rec->max_size);
10654                         cur_err = 1;
10655                 }
10656
10657                 err = cur_err;
10658                 remove_cache_extent(extent_cache, cache);
10659                 free_all_extent_backrefs(rec);
10660                 if (!init_extent_tree && repair && (!cur_err || fix))
10661                         clear_extent_dirty(root->fs_info->excluded_extents,
10662                                            rec->start,
10663                                            rec->start + rec->max_size - 1);
10664                 free(rec);
10665         }
10666 repair_abort:
10667         if (repair) {
10668                 if (ret && ret != -EAGAIN) {
10669                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10670                         exit(1);
10671                 } else if (!ret) {
10672                         struct btrfs_trans_handle *trans;
10673
10674                         root = root->fs_info->extent_root;
10675                         trans = btrfs_start_transaction(root, 1);
10676                         if (IS_ERR(trans)) {
10677                                 ret = PTR_ERR(trans);
10678                                 goto repair_abort;
10679                         }
10680
10681                         ret = btrfs_fix_block_accounting(trans, root);
10682                         if (ret)
10683                                 goto repair_abort;
10684                         ret = btrfs_commit_transaction(trans, root);
10685                         if (ret)
10686                                 goto repair_abort;
10687                 }
10688                 return ret;
10689         }
10690
10691         if (err)
10692                 err = -EIO;
10693         return err;
10694 }
10695
10696 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10697 {
10698         u64 stripe_size;
10699
10700         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10701                 stripe_size = length;
10702                 stripe_size /= num_stripes;
10703         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10704                 stripe_size = length * 2;
10705                 stripe_size /= num_stripes;
10706         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10707                 stripe_size = length;
10708                 stripe_size /= (num_stripes - 1);
10709         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10710                 stripe_size = length;
10711                 stripe_size /= (num_stripes - 2);
10712         } else {
10713                 stripe_size = length;
10714         }
10715         return stripe_size;
10716 }
10717
10718 /*
10719  * Check the chunk with its block group/dev list ref:
10720  * Return 0 if all refs seems valid.
10721  * Return 1 if part of refs seems valid, need later check for rebuild ref
10722  * like missing block group and needs to search extent tree to rebuild them.
10723  * Return -1 if essential refs are missing and unable to rebuild.
10724  */
10725 static int check_chunk_refs(struct chunk_record *chunk_rec,
10726                             struct block_group_tree *block_group_cache,
10727                             struct device_extent_tree *dev_extent_cache,
10728                             int silent)
10729 {
10730         struct cache_extent *block_group_item;
10731         struct block_group_record *block_group_rec;
10732         struct cache_extent *dev_extent_item;
10733         struct device_extent_record *dev_extent_rec;
10734         u64 devid;
10735         u64 offset;
10736         u64 length;
10737         int metadump_v2 = 0;
10738         int i;
10739         int ret = 0;
10740
10741         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10742                                                chunk_rec->offset,
10743                                                chunk_rec->length);
10744         if (block_group_item) {
10745                 block_group_rec = container_of(block_group_item,
10746                                                struct block_group_record,
10747                                                cache);
10748                 if (chunk_rec->length != block_group_rec->offset ||
10749                     chunk_rec->offset != block_group_rec->objectid ||
10750                     (!metadump_v2 &&
10751                      chunk_rec->type_flags != block_group_rec->flags)) {
10752                         if (!silent)
10753                                 fprintf(stderr,
10754                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10755                                         chunk_rec->objectid,
10756                                         chunk_rec->type,
10757                                         chunk_rec->offset,
10758                                         chunk_rec->length,
10759                                         chunk_rec->offset,
10760                                         chunk_rec->type_flags,
10761                                         block_group_rec->objectid,
10762                                         block_group_rec->type,
10763                                         block_group_rec->offset,
10764                                         block_group_rec->offset,
10765                                         block_group_rec->objectid,
10766                                         block_group_rec->flags);
10767                         ret = -1;
10768                 } else {
10769                         list_del_init(&block_group_rec->list);
10770                         chunk_rec->bg_rec = block_group_rec;
10771                 }
10772         } else {
10773                 if (!silent)
10774                         fprintf(stderr,
10775                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10776                                 chunk_rec->objectid,
10777                                 chunk_rec->type,
10778                                 chunk_rec->offset,
10779                                 chunk_rec->length,
10780                                 chunk_rec->offset,
10781                                 chunk_rec->type_flags);
10782                 ret = 1;
10783         }
10784
10785         if (metadump_v2)
10786                 return ret;
10787
10788         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10789                                     chunk_rec->num_stripes);
10790         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10791                 devid = chunk_rec->stripes[i].devid;
10792                 offset = chunk_rec->stripes[i].offset;
10793                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10794                                                        devid, offset, length);
10795                 if (dev_extent_item) {
10796                         dev_extent_rec = container_of(dev_extent_item,
10797                                                 struct device_extent_record,
10798                                                 cache);
10799                         if (dev_extent_rec->objectid != devid ||
10800                             dev_extent_rec->offset != offset ||
10801                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10802                             dev_extent_rec->length != length) {
10803                                 if (!silent)
10804                                         fprintf(stderr,
10805                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10806                                                 chunk_rec->objectid,
10807                                                 chunk_rec->type,
10808                                                 chunk_rec->offset,
10809                                                 chunk_rec->stripes[i].devid,
10810                                                 chunk_rec->stripes[i].offset,
10811                                                 dev_extent_rec->objectid,
10812                                                 dev_extent_rec->offset,
10813                                                 dev_extent_rec->length);
10814                                 ret = -1;
10815                         } else {
10816                                 list_move(&dev_extent_rec->chunk_list,
10817                                           &chunk_rec->dextents);
10818                         }
10819                 } else {
10820                         if (!silent)
10821                                 fprintf(stderr,
10822                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10823                                         chunk_rec->objectid,
10824                                         chunk_rec->type,
10825                                         chunk_rec->offset,
10826                                         chunk_rec->stripes[i].devid,
10827                                         chunk_rec->stripes[i].offset);
10828                         ret = -1;
10829                 }
10830         }
10831         return ret;
10832 }
10833
10834 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10835 int check_chunks(struct cache_tree *chunk_cache,
10836                  struct block_group_tree *block_group_cache,
10837                  struct device_extent_tree *dev_extent_cache,
10838                  struct list_head *good, struct list_head *bad,
10839                  struct list_head *rebuild, int silent)
10840 {
10841         struct cache_extent *chunk_item;
10842         struct chunk_record *chunk_rec;
10843         struct block_group_record *bg_rec;
10844         struct device_extent_record *dext_rec;
10845         int err;
10846         int ret = 0;
10847
10848         chunk_item = first_cache_extent(chunk_cache);
10849         while (chunk_item) {
10850                 chunk_rec = container_of(chunk_item, struct chunk_record,
10851                                          cache);
10852                 err = check_chunk_refs(chunk_rec, block_group_cache,
10853                                        dev_extent_cache, silent);
10854                 if (err < 0)
10855                         ret = err;
10856                 if (err == 0 && good)
10857                         list_add_tail(&chunk_rec->list, good);
10858                 if (err > 0 && rebuild)
10859                         list_add_tail(&chunk_rec->list, rebuild);
10860                 if (err < 0 && bad)
10861                         list_add_tail(&chunk_rec->list, bad);
10862                 chunk_item = next_cache_extent(chunk_item);
10863         }
10864
10865         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10866                 if (!silent)
10867                         fprintf(stderr,
10868                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10869                                 bg_rec->objectid,
10870                                 bg_rec->offset,
10871                                 bg_rec->flags);
10872                 if (!ret)
10873                         ret = 1;
10874         }
10875
10876         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10877                             chunk_list) {
10878                 if (!silent)
10879                         fprintf(stderr,
10880                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10881                                 dext_rec->objectid,
10882                                 dext_rec->offset,
10883                                 dext_rec->length);
10884                 if (!ret)
10885                         ret = 1;
10886         }
10887         return ret;
10888 }
10889
10890
10891 static int check_device_used(struct device_record *dev_rec,
10892                              struct device_extent_tree *dext_cache)
10893 {
10894         struct cache_extent *cache;
10895         struct device_extent_record *dev_extent_rec;
10896         u64 total_byte = 0;
10897
10898         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10899         while (cache) {
10900                 dev_extent_rec = container_of(cache,
10901                                               struct device_extent_record,
10902                                               cache);
10903                 if (dev_extent_rec->objectid != dev_rec->devid)
10904                         break;
10905
10906                 list_del_init(&dev_extent_rec->device_list);
10907                 total_byte += dev_extent_rec->length;
10908                 cache = next_cache_extent(cache);
10909         }
10910
10911         if (total_byte != dev_rec->byte_used) {
10912                 fprintf(stderr,
10913                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10914                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10915                         dev_rec->type, dev_rec->offset);
10916                 return -1;
10917         } else {
10918                 return 0;
10919         }
10920 }
10921
10922 /*
10923  * Extra (optional) check for dev_item size to report possbile problem on a new
10924  * kernel.
10925  */
10926 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10927 {
10928         if (!IS_ALIGNED(total_bytes, sectorsize)) {
10929                 warning(
10930 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10931                         devid, total_bytes, sectorsize);
10932                 warning(
10933 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10934                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10935         }
10936 }
10937
10938 /*
10939  * Unlike device size alignment check above, some super total_bytes check
10940  * failure can lead to mount failure for newer kernel.
10941  *
10942  * So this function will return the error for a fatal super total_bytes problem.
10943  */
10944 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
10945 {
10946         struct btrfs_device *dev;
10947         struct list_head *dev_list = &fs_info->fs_devices->devices;
10948         u64 total_bytes = 0;
10949         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
10950
10951         list_for_each_entry(dev, dev_list, dev_list)
10952                 total_bytes += dev->total_bytes;
10953
10954         /* Important check, which can cause unmountable fs */
10955         if (super_bytes < total_bytes) {
10956                 error("super total bytes %llu smaller than real device(s) size %llu",
10957                         super_bytes, total_bytes);
10958                 error("mounting this fs may fail for newer kernels");
10959                 error("this can be fixed by 'btrfs rescue fix-device-size'");
10960                 return false;
10961         }
10962
10963         /*
10964          * Optional check, just to make everything aligned and match with each
10965          * other.
10966          *
10967          * For a btrfs-image restored fs, we don't need to check it anyway.
10968          */
10969         if (btrfs_super_flags(fs_info->super_copy) &
10970             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
10971                 return true;
10972         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
10973             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
10974             super_bytes != total_bytes) {
10975                 warning("minor unaligned/mismatch device size detected");
10976                 warning(
10977                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
10978         }
10979         return true;
10980 }
10981
10982 /* check btrfs_dev_item -> btrfs_dev_extent */
10983 static int check_devices(struct rb_root *dev_cache,
10984                          struct device_extent_tree *dev_extent_cache)
10985 {
10986         struct rb_node *dev_node;
10987         struct device_record *dev_rec;
10988         struct device_extent_record *dext_rec;
10989         int err;
10990         int ret = 0;
10991
10992         dev_node = rb_first(dev_cache);
10993         while (dev_node) {
10994                 dev_rec = container_of(dev_node, struct device_record, node);
10995                 err = check_device_used(dev_rec, dev_extent_cache);
10996                 if (err)
10997                         ret = err;
10998
10999                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11000                                          global_info->sectorsize);
11001                 dev_node = rb_next(dev_node);
11002         }
11003         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11004                             device_list) {
11005                 fprintf(stderr,
11006                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11007                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11008                 if (!ret)
11009                         ret = 1;
11010         }
11011         return ret;
11012 }
11013
11014 static int add_root_item_to_list(struct list_head *head,
11015                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11016                                   u8 level, u8 drop_level,
11017                                   struct btrfs_key *drop_key)
11018 {
11019
11020         struct root_item_record *ri_rec;
11021         ri_rec = malloc(sizeof(*ri_rec));
11022         if (!ri_rec)
11023                 return -ENOMEM;
11024         ri_rec->bytenr = bytenr;
11025         ri_rec->objectid = objectid;
11026         ri_rec->level = level;
11027         ri_rec->drop_level = drop_level;
11028         ri_rec->last_snapshot = last_snapshot;
11029         if (drop_key)
11030                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11031         list_add_tail(&ri_rec->list, head);
11032
11033         return 0;
11034 }
11035
11036 static void free_root_item_list(struct list_head *list)
11037 {
11038         struct root_item_record *ri_rec;
11039
11040         while (!list_empty(list)) {
11041                 ri_rec = list_first_entry(list, struct root_item_record,
11042                                           list);
11043                 list_del_init(&ri_rec->list);
11044                 free(ri_rec);
11045         }
11046 }
11047
11048 static int deal_root_from_list(struct list_head *list,
11049                                struct btrfs_root *root,
11050                                struct block_info *bits,
11051                                int bits_nr,
11052                                struct cache_tree *pending,
11053                                struct cache_tree *seen,
11054                                struct cache_tree *reada,
11055                                struct cache_tree *nodes,
11056                                struct cache_tree *extent_cache,
11057                                struct cache_tree *chunk_cache,
11058                                struct rb_root *dev_cache,
11059                                struct block_group_tree *block_group_cache,
11060                                struct device_extent_tree *dev_extent_cache)
11061 {
11062         int ret = 0;
11063         u64 last;
11064
11065         while (!list_empty(list)) {
11066                 struct root_item_record *rec;
11067                 struct extent_buffer *buf;
11068                 rec = list_entry(list->next,
11069                                  struct root_item_record, list);
11070                 last = 0;
11071                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11072                 if (!extent_buffer_uptodate(buf)) {
11073                         free_extent_buffer(buf);
11074                         ret = -EIO;
11075                         break;
11076                 }
11077                 ret = add_root_to_pending(buf, extent_cache, pending,
11078                                     seen, nodes, rec->objectid);
11079                 if (ret < 0)
11080                         break;
11081                 /*
11082                  * To rebuild extent tree, we need deal with snapshot
11083                  * one by one, otherwise we deal with node firstly which
11084                  * can maximize readahead.
11085                  */
11086                 while (1) {
11087                         ret = run_next_block(root, bits, bits_nr, &last,
11088                                              pending, seen, reada, nodes,
11089                                              extent_cache, chunk_cache,
11090                                              dev_cache, block_group_cache,
11091                                              dev_extent_cache, rec);
11092                         if (ret != 0)
11093                                 break;
11094                 }
11095                 free_extent_buffer(buf);
11096                 list_del(&rec->list);
11097                 free(rec);
11098                 if (ret < 0)
11099                         break;
11100         }
11101         while (ret >= 0) {
11102                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11103                                      reada, nodes, extent_cache, chunk_cache,
11104                                      dev_cache, block_group_cache,
11105                                      dev_extent_cache, NULL);
11106                 if (ret != 0) {
11107                         if (ret > 0)
11108                                 ret = 0;
11109                         break;
11110                 }
11111         }
11112         return ret;
11113 }
11114
11115 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11116 {
11117         struct rb_root dev_cache;
11118         struct cache_tree chunk_cache;
11119         struct block_group_tree block_group_cache;
11120         struct device_extent_tree dev_extent_cache;
11121         struct cache_tree extent_cache;
11122         struct cache_tree seen;
11123         struct cache_tree pending;
11124         struct cache_tree reada;
11125         struct cache_tree nodes;
11126         struct extent_io_tree excluded_extents;
11127         struct cache_tree corrupt_blocks;
11128         struct btrfs_path path;
11129         struct btrfs_key key;
11130         struct btrfs_key found_key;
11131         int ret, err = 0;
11132         struct block_info *bits;
11133         int bits_nr;
11134         struct extent_buffer *leaf;
11135         int slot;
11136         struct btrfs_root_item ri;
11137         struct list_head dropping_trees;
11138         struct list_head normal_trees;
11139         struct btrfs_root *root1;
11140         struct btrfs_root *root;
11141         u64 objectid;
11142         u8 level;
11143
11144         root = fs_info->fs_root;
11145         dev_cache = RB_ROOT;
11146         cache_tree_init(&chunk_cache);
11147         block_group_tree_init(&block_group_cache);
11148         device_extent_tree_init(&dev_extent_cache);
11149
11150         cache_tree_init(&extent_cache);
11151         cache_tree_init(&seen);
11152         cache_tree_init(&pending);
11153         cache_tree_init(&nodes);
11154         cache_tree_init(&reada);
11155         cache_tree_init(&corrupt_blocks);
11156         extent_io_tree_init(&excluded_extents);
11157         INIT_LIST_HEAD(&dropping_trees);
11158         INIT_LIST_HEAD(&normal_trees);
11159
11160         if (repair) {
11161                 fs_info->excluded_extents = &excluded_extents;
11162                 fs_info->fsck_extent_cache = &extent_cache;
11163                 fs_info->free_extent_hook = free_extent_hook;
11164                 fs_info->corrupt_blocks = &corrupt_blocks;
11165         }
11166
11167         bits_nr = 1024;
11168         bits = malloc(bits_nr * sizeof(struct block_info));
11169         if (!bits) {
11170                 perror("malloc");
11171                 exit(1);
11172         }
11173
11174         if (ctx.progress_enabled) {
11175                 ctx.tp = TASK_EXTENTS;
11176                 task_start(ctx.info);
11177         }
11178
11179 again:
11180         root1 = fs_info->tree_root;
11181         level = btrfs_header_level(root1->node);
11182         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11183                                     root1->node->start, 0, level, 0, NULL);
11184         if (ret < 0)
11185                 goto out;
11186         root1 = fs_info->chunk_root;
11187         level = btrfs_header_level(root1->node);
11188         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11189                                     root1->node->start, 0, level, 0, NULL);
11190         if (ret < 0)
11191                 goto out;
11192         btrfs_init_path(&path);
11193         key.offset = 0;
11194         key.objectid = 0;
11195         key.type = BTRFS_ROOT_ITEM_KEY;
11196         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11197         if (ret < 0)
11198                 goto out;
11199         while(1) {
11200                 leaf = path.nodes[0];
11201                 slot = path.slots[0];
11202                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11203                         ret = btrfs_next_leaf(root, &path);
11204                         if (ret != 0)
11205                                 break;
11206                         leaf = path.nodes[0];
11207                         slot = path.slots[0];
11208                 }
11209                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11210                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11211                         unsigned long offset;
11212                         u64 last_snapshot;
11213
11214                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11215                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11216                         last_snapshot = btrfs_root_last_snapshot(&ri);
11217                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11218                                 level = btrfs_root_level(&ri);
11219                                 ret = add_root_item_to_list(&normal_trees,
11220                                                 found_key.objectid,
11221                                                 btrfs_root_bytenr(&ri),
11222                                                 last_snapshot, level,
11223                                                 0, NULL);
11224                                 if (ret < 0)
11225                                         goto out;
11226                         } else {
11227                                 level = btrfs_root_level(&ri);
11228                                 objectid = found_key.objectid;
11229                                 btrfs_disk_key_to_cpu(&found_key,
11230                                                       &ri.drop_progress);
11231                                 ret = add_root_item_to_list(&dropping_trees,
11232                                                 objectid,
11233                                                 btrfs_root_bytenr(&ri),
11234                                                 last_snapshot, level,
11235                                                 ri.drop_level, &found_key);
11236                                 if (ret < 0)
11237                                         goto out;
11238                         }
11239                 }
11240                 path.slots[0]++;
11241         }
11242         btrfs_release_path(&path);
11243
11244         /*
11245          * check_block can return -EAGAIN if it fixes something, please keep
11246          * this in mind when dealing with return values from these functions, if
11247          * we get -EAGAIN we want to fall through and restart the loop.
11248          */
11249         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11250                                   &seen, &reada, &nodes, &extent_cache,
11251                                   &chunk_cache, &dev_cache, &block_group_cache,
11252                                   &dev_extent_cache);
11253         if (ret < 0) {
11254                 if (ret == -EAGAIN)
11255                         goto loop;
11256                 goto out;
11257         }
11258         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11259                                   &pending, &seen, &reada, &nodes,
11260                                   &extent_cache, &chunk_cache, &dev_cache,
11261                                   &block_group_cache, &dev_extent_cache);
11262         if (ret < 0) {
11263                 if (ret == -EAGAIN)
11264                         goto loop;
11265                 goto out;
11266         }
11267
11268         ret = check_chunks(&chunk_cache, &block_group_cache,
11269                            &dev_extent_cache, NULL, NULL, NULL, 0);
11270         if (ret) {
11271                 if (ret == -EAGAIN)
11272                         goto loop;
11273                 err = ret;
11274         }
11275
11276         ret = check_extent_refs(root, &extent_cache);
11277         if (ret < 0) {
11278                 if (ret == -EAGAIN)
11279                         goto loop;
11280                 goto out;
11281         }
11282
11283         ret = check_devices(&dev_cache, &dev_extent_cache);
11284         if (ret && err)
11285                 ret = err;
11286
11287 out:
11288         task_stop(ctx.info);
11289         if (repair) {
11290                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11291                 extent_io_tree_cleanup(&excluded_extents);
11292                 fs_info->fsck_extent_cache = NULL;
11293                 fs_info->free_extent_hook = NULL;
11294                 fs_info->corrupt_blocks = NULL;
11295                 fs_info->excluded_extents = NULL;
11296         }
11297         free(bits);
11298         free_chunk_cache_tree(&chunk_cache);
11299         free_device_cache_tree(&dev_cache);
11300         free_block_group_tree(&block_group_cache);
11301         free_device_extent_tree(&dev_extent_cache);
11302         free_extent_cache_tree(&seen);
11303         free_extent_cache_tree(&pending);
11304         free_extent_cache_tree(&reada);
11305         free_extent_cache_tree(&nodes);
11306         free_root_item_list(&normal_trees);
11307         free_root_item_list(&dropping_trees);
11308         return ret;
11309 loop:
11310         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11311         free_extent_cache_tree(&seen);
11312         free_extent_cache_tree(&pending);
11313         free_extent_cache_tree(&reada);
11314         free_extent_cache_tree(&nodes);
11315         free_chunk_cache_tree(&chunk_cache);
11316         free_block_group_tree(&block_group_cache);
11317         free_device_cache_tree(&dev_cache);
11318         free_device_extent_tree(&dev_extent_cache);
11319         free_extent_record_cache(&extent_cache);
11320         free_root_item_list(&normal_trees);
11321         free_root_item_list(&dropping_trees);
11322         extent_io_tree_cleanup(&excluded_extents);
11323         goto again;
11324 }
11325
11326 static int check_extent_inline_ref(struct extent_buffer *eb,
11327                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11328 {
11329         int ret;
11330         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11331
11332         switch (type) {
11333         case BTRFS_TREE_BLOCK_REF_KEY:
11334         case BTRFS_EXTENT_DATA_REF_KEY:
11335         case BTRFS_SHARED_BLOCK_REF_KEY:
11336         case BTRFS_SHARED_DATA_REF_KEY:
11337                 ret = 0;
11338                 break;
11339         default:
11340                 error("extent[%llu %u %llu] has unknown ref type: %d",
11341                       key->objectid, key->type, key->offset, type);
11342                 ret = UNKNOWN_TYPE;
11343                 break;
11344         }
11345
11346         return ret;
11347 }
11348
11349 /*
11350  * Check backrefs of a tree block given by @bytenr or @eb.
11351  *
11352  * @root:       the root containing the @bytenr or @eb
11353  * @eb:         tree block extent buffer, can be NULL
11354  * @bytenr:     bytenr of the tree block to search
11355  * @level:      tree level of the tree block
11356  * @owner:      owner of the tree block
11357  *
11358  * Return >0 for any error found and output error message
11359  * Return 0 for no error found
11360  */
11361 static int check_tree_block_ref(struct btrfs_root *root,
11362                                 struct extent_buffer *eb, u64 bytenr,
11363                                 int level, u64 owner, struct node_refs *nrefs)
11364 {
11365         struct btrfs_key key;
11366         struct btrfs_root *extent_root = root->fs_info->extent_root;
11367         struct btrfs_path path;
11368         struct btrfs_extent_item *ei;
11369         struct btrfs_extent_inline_ref *iref;
11370         struct extent_buffer *leaf;
11371         unsigned long end;
11372         unsigned long ptr;
11373         int slot;
11374         int skinny_level;
11375         int root_level = btrfs_header_level(root->node);
11376         int type;
11377         u32 nodesize = root->fs_info->nodesize;
11378         u32 item_size;
11379         u64 offset;
11380         int found_ref = 0;
11381         int err = 0;
11382         int ret;
11383         int strict = 1;
11384         int parent = 0;
11385
11386         btrfs_init_path(&path);
11387         key.objectid = bytenr;
11388         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11389                 key.type = BTRFS_METADATA_ITEM_KEY;
11390         else
11391                 key.type = BTRFS_EXTENT_ITEM_KEY;
11392         key.offset = (u64)-1;
11393
11394         /* Search for the backref in extent tree */
11395         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11396         if (ret < 0) {
11397                 err |= BACKREF_MISSING;
11398                 goto out;
11399         }
11400         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11401         if (ret) {
11402                 err |= BACKREF_MISSING;
11403                 goto out;
11404         }
11405
11406         leaf = path.nodes[0];
11407         slot = path.slots[0];
11408         btrfs_item_key_to_cpu(leaf, &key, slot);
11409
11410         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11411
11412         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11413                 skinny_level = (int)key.offset;
11414                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11415         } else {
11416                 struct btrfs_tree_block_info *info;
11417
11418                 info = (struct btrfs_tree_block_info *)(ei + 1);
11419                 skinny_level = btrfs_tree_block_level(leaf, info);
11420                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11421         }
11422
11423
11424         if (eb) {
11425                 u64 header_gen;
11426                 u64 extent_gen;
11427
11428                 /*
11429                  * Due to the feature of shared tree blocks, if the upper node
11430                  * is a fs root or shared node, the extent of checked node may
11431                  * not be updated until the next CoW.
11432                  */
11433                 if (nrefs)
11434                         strict = should_check_extent_strictly(root, nrefs,
11435                                         level);
11436                 if (!(btrfs_extent_flags(leaf, ei) &
11437                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11438                         error(
11439                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11440                                 key.objectid, nodesize,
11441                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11442                         err = BACKREF_MISMATCH;
11443                 }
11444                 header_gen = btrfs_header_generation(eb);
11445                 extent_gen = btrfs_extent_generation(leaf, ei);
11446                 if (header_gen != extent_gen) {
11447                         error(
11448         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11449                                 key.objectid, nodesize, header_gen,
11450                                 extent_gen);
11451                         err = BACKREF_MISMATCH;
11452                 }
11453                 if (level != skinny_level) {
11454                         error(
11455                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11456                                 key.objectid, nodesize, level, skinny_level);
11457                         err = BACKREF_MISMATCH;
11458                 }
11459                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11460                         error(
11461                         "extent[%llu %u] is referred by other roots than %llu",
11462                                 key.objectid, nodesize, root->objectid);
11463                         err = BACKREF_MISMATCH;
11464                 }
11465         }
11466
11467         /*
11468          * Iterate the extent/metadata item to find the exact backref
11469          */
11470         item_size = btrfs_item_size_nr(leaf, slot);
11471         ptr = (unsigned long)iref;
11472         end = (unsigned long)ei + item_size;
11473
11474         while (ptr < end) {
11475                 iref = (struct btrfs_extent_inline_ref *)ptr;
11476                 type = btrfs_extent_inline_ref_type(leaf, iref);
11477                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11478
11479                 ret = check_extent_inline_ref(leaf, &key, iref);
11480                 if (ret) {
11481                         err |= ret;
11482                         break;
11483                 }
11484                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11485                         if (offset == root->objectid)
11486                                 found_ref = 1;
11487                         if (!strict && owner == offset)
11488                                 found_ref = 1;
11489                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11490                         /*
11491                          * Backref of tree reloc root points to itself, no need
11492                          * to check backref any more.
11493                          *
11494                          * This may be an error of loop backref, but extent tree
11495                          * checker should have already handled it.
11496                          * Here we only need to avoid infinite iteration.
11497                          */
11498                         if (offset == bytenr) {
11499                                 found_ref = 1;
11500                         } else {
11501                                 /*
11502                                  * Check if the backref points to valid
11503                                  * referencer
11504                                  */
11505                                 found_ref = !check_tree_block_ref( root, NULL,
11506                                                 offset, level + 1, owner,
11507                                                 NULL);
11508                         }
11509                 }
11510
11511                 if (found_ref)
11512                         break;
11513                 ptr += btrfs_extent_inline_ref_size(type);
11514         }
11515
11516         /*
11517          * Inlined extent item doesn't have what we need, check
11518          * TREE_BLOCK_REF_KEY
11519          */
11520         if (!found_ref) {
11521                 btrfs_release_path(&path);
11522                 key.objectid = bytenr;
11523                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11524                 key.offset = root->objectid;
11525
11526                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11527                 if (!ret)
11528                         found_ref = 1;
11529         }
11530         /*
11531          * Finally check SHARED BLOCK REF, any found will be good
11532          * Here we're not doing comprehensive extent backref checking,
11533          * only need to ensure there is some extent referring to this
11534          * tree block.
11535          */
11536         if (!found_ref) {
11537                 btrfs_release_path(&path);
11538                 key.objectid = bytenr;
11539                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11540                 key.offset = (u64)-1;
11541
11542                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11543                 if (ret < 0) {
11544                         err |= BACKREF_MISSING;
11545                         goto out;
11546                 }
11547                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11548                 if (ret) {
11549                         err |= BACKREF_MISSING;
11550                         goto out;
11551                 }
11552                 found_ref = 1;
11553         }
11554         if (!found_ref)
11555                 err |= BACKREF_MISSING;
11556 out:
11557         btrfs_release_path(&path);
11558         if (nrefs && strict &&
11559             level < root_level && nrefs->full_backref[level + 1])
11560                 parent = nrefs->bytenr[level + 1];
11561         if (eb && (err & BACKREF_MISSING))
11562                 error(
11563         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11564                       bytenr, nodesize, owner, level,
11565                       parent ? "parent" : "root",
11566                       parent ? parent : root->objectid);
11567         return err;
11568 }
11569
11570 /*
11571  * If @err contains BACKREF_MISSING then add extent of the
11572  * file_extent_data_item.
11573  *
11574  * Returns error bits after reapir.
11575  */
11576 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11577                                    struct btrfs_root *root,
11578                                    struct btrfs_path *pathp,
11579                                    struct node_refs *nrefs,
11580                                    int err)
11581 {
11582         struct btrfs_file_extent_item *fi;
11583         struct btrfs_key fi_key;
11584         struct btrfs_key key;
11585         struct btrfs_extent_item *ei;
11586         struct btrfs_path path;
11587         struct btrfs_root *extent_root = root->fs_info->extent_root;
11588         struct extent_buffer *eb;
11589         u64 size;
11590         u64 disk_bytenr;
11591         u64 num_bytes;
11592         u64 parent;
11593         u64 offset;
11594         u64 extent_offset;
11595         u64 file_offset;
11596         int generation;
11597         int slot;
11598         int ret = 0;
11599
11600         eb = pathp->nodes[0];
11601         slot = pathp->slots[0];
11602         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11603         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11604
11605         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11606             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11607                 return err;
11608
11609         file_offset = fi_key.offset;
11610         generation = btrfs_file_extent_generation(eb, fi);
11611         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11612         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11613         extent_offset = btrfs_file_extent_offset(eb, fi);
11614         offset = file_offset - extent_offset;
11615
11616         /* now repair only adds backref */
11617         if ((err & BACKREF_MISSING) == 0)
11618                 return err;
11619
11620         /* search extent item */
11621         key.objectid = disk_bytenr;
11622         key.type = BTRFS_EXTENT_ITEM_KEY;
11623         key.offset = num_bytes;
11624
11625         btrfs_init_path(&path);
11626         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11627         if (ret < 0) {
11628                 ret = -EIO;
11629                 goto out;
11630         }
11631
11632         /* insert an extent item */
11633         if (ret > 0) {
11634                 key.objectid = disk_bytenr;
11635                 key.type = BTRFS_EXTENT_ITEM_KEY;
11636                 key.offset = num_bytes;
11637                 size = sizeof(*ei);
11638
11639                 btrfs_release_path(&path);
11640                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11641                                               size);
11642                 if (ret)
11643                         goto out;
11644                 eb = path.nodes[0];
11645                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11646
11647                 btrfs_set_extent_refs(eb, ei, 0);
11648                 btrfs_set_extent_generation(eb, ei, generation);
11649                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11650
11651                 btrfs_mark_buffer_dirty(eb);
11652                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11653                                                num_bytes, 1, 0);
11654                 btrfs_release_path(&path);
11655         }
11656
11657         if (nrefs->full_backref[0])
11658                 parent = btrfs_header_bytenr(eb);
11659         else
11660                 parent = 0;
11661
11662         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11663                                    root->objectid,
11664                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11665                                    offset);
11666         if (ret) {
11667                 error(
11668                 "failed to increase extent data backref[%llu %llu] root %llu",
11669                       disk_bytenr, num_bytes, root->objectid);
11670                 goto out;
11671         } else {
11672                 printf("Add one extent data backref [%llu %llu]\n",
11673                        disk_bytenr, num_bytes);
11674         }
11675
11676         err &= ~BACKREF_MISSING;
11677 out:
11678         if (ret)
11679                 error("can't repair root %llu extent data item[%llu %llu]",
11680                       root->objectid, disk_bytenr, num_bytes);
11681         return err;
11682 }
11683
11684 /*
11685  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11686  *
11687  * Return >0 any error found and output error message
11688  * Return 0 for no error found
11689  */
11690 static int check_extent_data_item(struct btrfs_root *root,
11691                                   struct btrfs_path *pathp,
11692                                   struct node_refs *nrefs,  int account_bytes)
11693 {
11694         struct btrfs_file_extent_item *fi;
11695         struct extent_buffer *eb = pathp->nodes[0];
11696         struct btrfs_path path;
11697         struct btrfs_root *extent_root = root->fs_info->extent_root;
11698         struct btrfs_key fi_key;
11699         struct btrfs_key dbref_key;
11700         struct extent_buffer *leaf;
11701         struct btrfs_extent_item *ei;
11702         struct btrfs_extent_inline_ref *iref;
11703         struct btrfs_extent_data_ref *dref;
11704         u64 owner;
11705         u64 disk_bytenr;
11706         u64 disk_num_bytes;
11707         u64 extent_num_bytes;
11708         u64 extent_flags;
11709         u64 offset;
11710         u32 item_size;
11711         unsigned long end;
11712         unsigned long ptr;
11713         int type;
11714         int found_dbackref = 0;
11715         int slot = pathp->slots[0];
11716         int err = 0;
11717         int ret;
11718         int strict;
11719
11720         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11721         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11722
11723         /* Nothing to check for hole and inline data extents */
11724         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11725             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11726                 return 0;
11727
11728         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11729         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11730         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11731         offset = btrfs_file_extent_offset(eb, fi);
11732
11733         /* Check unaligned disk_num_bytes and num_bytes */
11734         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11735                 error(
11736 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11737                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11738                         root->fs_info->sectorsize);
11739                 err |= BYTES_UNALIGNED;
11740         } else if (account_bytes) {
11741                 data_bytes_allocated += disk_num_bytes;
11742         }
11743         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11744                 error(
11745 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11746                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11747                         root->fs_info->sectorsize);
11748                 err |= BYTES_UNALIGNED;
11749         } else if (account_bytes) {
11750                 data_bytes_referenced += extent_num_bytes;
11751         }
11752         owner = btrfs_header_owner(eb);
11753
11754         /* Check the extent item of the file extent in extent tree */
11755         btrfs_init_path(&path);
11756         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11757         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11758         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11759
11760         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11761         if (ret)
11762                 goto out;
11763
11764         leaf = path.nodes[0];
11765         slot = path.slots[0];
11766         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11767
11768         extent_flags = btrfs_extent_flags(leaf, ei);
11769
11770         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11771                 error(
11772                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11773                     disk_bytenr, disk_num_bytes,
11774                     BTRFS_EXTENT_FLAG_DATA);
11775                 err |= BACKREF_MISMATCH;
11776         }
11777
11778         /* Check data backref inside that extent item */
11779         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11780         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11781         ptr = (unsigned long)iref;
11782         end = (unsigned long)ei + item_size;
11783         strict = should_check_extent_strictly(root, nrefs, -1);
11784
11785         while (ptr < end) {
11786                 u64 ref_root;
11787                 u64 ref_objectid;
11788                 u64 ref_offset;
11789                 bool match = false;
11790
11791                 iref = (struct btrfs_extent_inline_ref *)ptr;
11792                 type = btrfs_extent_inline_ref_type(leaf, iref);
11793                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11794
11795                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11796                 if (ret) {
11797                         err |= ret;
11798                         break;
11799                 }
11800                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11801                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11802                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11803                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11804
11805                         if (ref_objectid == fi_key.objectid &&
11806                             ref_offset == fi_key.offset - offset)
11807                                 match = true;
11808                         if (ref_root == root->objectid && match)
11809                                 found_dbackref = 1;
11810                         else if (!strict && owner == ref_root && match)
11811                                 found_dbackref = 1;
11812                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11813                         found_dbackref = !check_tree_block_ref(root, NULL,
11814                                 btrfs_extent_inline_ref_offset(leaf, iref),
11815                                 0, owner, NULL);
11816                 }
11817
11818                 if (found_dbackref)
11819                         break;
11820                 ptr += btrfs_extent_inline_ref_size(type);
11821         }
11822
11823         if (!found_dbackref) {
11824                 btrfs_release_path(&path);
11825
11826                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11827                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11828                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11829                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11830                                 fi_key.objectid, fi_key.offset - offset);
11831
11832                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11833                                         &dbref_key, &path, 0, 0);
11834                 if (!ret) {
11835                         found_dbackref = 1;
11836                         goto out;
11837                 }
11838
11839                 btrfs_release_path(&path);
11840
11841                 /*
11842                  * Neither inlined nor EXTENT_DATA_REF found, try
11843                  * SHARED_DATA_REF as last chance.
11844                  */
11845                 dbref_key.objectid = disk_bytenr;
11846                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11847                 dbref_key.offset = eb->start;
11848
11849                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11850                                         &dbref_key, &path, 0, 0);
11851                 if (!ret) {
11852                         found_dbackref = 1;
11853                         goto out;
11854                 }
11855         }
11856
11857 out:
11858         if (!found_dbackref)
11859                 err |= BACKREF_MISSING;
11860         btrfs_release_path(&path);
11861         if (err & BACKREF_MISSING) {
11862                 error("data extent[%llu %llu] backref lost",
11863                       disk_bytenr, disk_num_bytes);
11864         }
11865         return err;
11866 }
11867
11868 /*
11869  * Get real tree block level for the case like shared block
11870  * Return >= 0 as tree level
11871  * Return <0 for error
11872  */
11873 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11874 {
11875         struct extent_buffer *eb;
11876         struct btrfs_path path;
11877         struct btrfs_key key;
11878         struct btrfs_extent_item *ei;
11879         u64 flags;
11880         u64 transid;
11881         u8 backref_level;
11882         u8 header_level;
11883         int ret;
11884
11885         /* Search extent tree for extent generation and level */
11886         key.objectid = bytenr;
11887         key.type = BTRFS_METADATA_ITEM_KEY;
11888         key.offset = (u64)-1;
11889
11890         btrfs_init_path(&path);
11891         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11892         if (ret < 0)
11893                 goto release_out;
11894         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11895         if (ret < 0)
11896                 goto release_out;
11897         if (ret > 0) {
11898                 ret = -ENOENT;
11899                 goto release_out;
11900         }
11901
11902         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11903         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11904                             struct btrfs_extent_item);
11905         flags = btrfs_extent_flags(path.nodes[0], ei);
11906         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11907                 ret = -ENOENT;
11908                 goto release_out;
11909         }
11910
11911         /* Get transid for later read_tree_block() check */
11912         transid = btrfs_extent_generation(path.nodes[0], ei);
11913
11914         /* Get backref level as one source */
11915         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11916                 backref_level = key.offset;
11917         } else {
11918                 struct btrfs_tree_block_info *info;
11919
11920                 info = (struct btrfs_tree_block_info *)(ei + 1);
11921                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11922         }
11923         btrfs_release_path(&path);
11924
11925         /* Get level from tree block as an alternative source */
11926         eb = read_tree_block(fs_info, bytenr, transid);
11927         if (!extent_buffer_uptodate(eb)) {
11928                 free_extent_buffer(eb);
11929                 return -EIO;
11930         }
11931         header_level = btrfs_header_level(eb);
11932         free_extent_buffer(eb);
11933
11934         if (header_level != backref_level)
11935                 return -EIO;
11936         return header_level;
11937
11938 release_out:
11939         btrfs_release_path(&path);
11940         return ret;
11941 }
11942
11943 /*
11944  * Check if a tree block backref is valid (points to a valid tree block)
11945  * if level == -1, level will be resolved
11946  * Return >0 for any error found and print error message
11947  */
11948 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
11949                                     u64 bytenr, int level)
11950 {
11951         struct btrfs_root *root;
11952         struct btrfs_key key;
11953         struct btrfs_path path;
11954         struct extent_buffer *eb;
11955         struct extent_buffer *node;
11956         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
11957         int err = 0;
11958         int ret;
11959
11960         /* Query level for level == -1 special case */
11961         if (level == -1)
11962                 level = query_tree_block_level(fs_info, bytenr);
11963         if (level < 0) {
11964                 err |= REFERENCER_MISSING;
11965                 goto out;
11966         }
11967
11968         key.objectid = root_id;
11969         key.type = BTRFS_ROOT_ITEM_KEY;
11970         key.offset = (u64)-1;
11971
11972         root = btrfs_read_fs_root(fs_info, &key);
11973         if (IS_ERR(root)) {
11974                 err |= REFERENCER_MISSING;
11975                 goto out;
11976         }
11977
11978         /* Read out the tree block to get item/node key */
11979         eb = read_tree_block(fs_info, bytenr, 0);
11980         if (!extent_buffer_uptodate(eb)) {
11981                 err |= REFERENCER_MISSING;
11982                 free_extent_buffer(eb);
11983                 goto out;
11984         }
11985
11986         /* Empty tree, no need to check key */
11987         if (!btrfs_header_nritems(eb) && !level) {
11988                 free_extent_buffer(eb);
11989                 goto out;
11990         }
11991
11992         if (level)
11993                 btrfs_node_key_to_cpu(eb, &key, 0);
11994         else
11995                 btrfs_item_key_to_cpu(eb, &key, 0);
11996
11997         free_extent_buffer(eb);
11998
11999         btrfs_init_path(&path);
12000         path.lowest_level = level;
12001         /* Search with the first key, to ensure we can reach it */
12002         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12003         if (ret < 0) {
12004                 err |= REFERENCER_MISSING;
12005                 goto release_out;
12006         }
12007
12008         node = path.nodes[level];
12009         if (btrfs_header_bytenr(node) != bytenr) {
12010                 error(
12011         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12012                         bytenr, nodesize, bytenr,
12013                         btrfs_header_bytenr(node));
12014                 err |= REFERENCER_MISMATCH;
12015         }
12016         if (btrfs_header_level(node) != level) {
12017                 error(
12018         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12019                         bytenr, nodesize, level,
12020                         btrfs_header_level(node));
12021                 err |= REFERENCER_MISMATCH;
12022         }
12023
12024 release_out:
12025         btrfs_release_path(&path);
12026 out:
12027         if (err & REFERENCER_MISSING) {
12028                 if (level < 0)
12029                         error("extent [%llu %d] lost referencer (owner: %llu)",
12030                                 bytenr, nodesize, root_id);
12031                 else
12032                         error(
12033                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12034                                 bytenr, nodesize, root_id, level);
12035         }
12036
12037         return err;
12038 }
12039
12040 /*
12041  * Check if tree block @eb is tree reloc root.
12042  * Return 0 if it's not or any problem happens
12043  * Return 1 if it's a tree reloc root
12044  */
12045 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12046                                  struct extent_buffer *eb)
12047 {
12048         struct btrfs_root *tree_reloc_root;
12049         struct btrfs_key key;
12050         u64 bytenr = btrfs_header_bytenr(eb);
12051         u64 owner = btrfs_header_owner(eb);
12052         int ret = 0;
12053
12054         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12055         key.offset = owner;
12056         key.type = BTRFS_ROOT_ITEM_KEY;
12057
12058         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12059         if (IS_ERR(tree_reloc_root))
12060                 return 0;
12061
12062         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12063                 ret = 1;
12064         btrfs_free_fs_root(tree_reloc_root);
12065         return ret;
12066 }
12067
12068 /*
12069  * Check referencer for shared block backref
12070  * If level == -1, this function will resolve the level.
12071  */
12072 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12073                                      u64 parent, u64 bytenr, int level)
12074 {
12075         struct extent_buffer *eb;
12076         u32 nr;
12077         int found_parent = 0;
12078         int i;
12079
12080         eb = read_tree_block(fs_info, parent, 0);
12081         if (!extent_buffer_uptodate(eb))
12082                 goto out;
12083
12084         if (level == -1)
12085                 level = query_tree_block_level(fs_info, bytenr);
12086         if (level < 0)
12087                 goto out;
12088
12089         /* It's possible it's a tree reloc root */
12090         if (parent == bytenr) {
12091                 if (is_tree_reloc_root(fs_info, eb))
12092                         found_parent = 1;
12093                 goto out;
12094         }
12095
12096         if (level + 1 != btrfs_header_level(eb))
12097                 goto out;
12098
12099         nr = btrfs_header_nritems(eb);
12100         for (i = 0; i < nr; i++) {
12101                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12102                         found_parent = 1;
12103                         break;
12104                 }
12105         }
12106 out:
12107         free_extent_buffer(eb);
12108         if (!found_parent) {
12109                 error(
12110         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12111                         bytenr, fs_info->nodesize, parent, level);
12112                 return REFERENCER_MISSING;
12113         }
12114         return 0;
12115 }
12116
12117 /*
12118  * Check referencer for normal (inlined) data ref
12119  * If len == 0, it will be resolved by searching in extent tree
12120  */
12121 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12122                                      u64 root_id, u64 objectid, u64 offset,
12123                                      u64 bytenr, u64 len, u32 count)
12124 {
12125         struct btrfs_root *root;
12126         struct btrfs_root *extent_root = fs_info->extent_root;
12127         struct btrfs_key key;
12128         struct btrfs_path path;
12129         struct extent_buffer *leaf;
12130         struct btrfs_file_extent_item *fi;
12131         u32 found_count = 0;
12132         int slot;
12133         int ret = 0;
12134
12135         if (!len) {
12136                 key.objectid = bytenr;
12137                 key.type = BTRFS_EXTENT_ITEM_KEY;
12138                 key.offset = (u64)-1;
12139
12140                 btrfs_init_path(&path);
12141                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12142                 if (ret < 0)
12143                         goto out;
12144                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12145                 if (ret)
12146                         goto out;
12147                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12148                 if (key.objectid != bytenr ||
12149                     key.type != BTRFS_EXTENT_ITEM_KEY)
12150                         goto out;
12151                 len = key.offset;
12152                 btrfs_release_path(&path);
12153         }
12154         key.objectid = root_id;
12155         key.type = BTRFS_ROOT_ITEM_KEY;
12156         key.offset = (u64)-1;
12157         btrfs_init_path(&path);
12158
12159         root = btrfs_read_fs_root(fs_info, &key);
12160         if (IS_ERR(root))
12161                 goto out;
12162
12163         key.objectid = objectid;
12164         key.type = BTRFS_EXTENT_DATA_KEY;
12165         /*
12166          * It can be nasty as data backref offset is
12167          * file offset - file extent offset, which is smaller or
12168          * equal to original backref offset.  The only special case is
12169          * overflow.  So we need to special check and do further search.
12170          */
12171         key.offset = offset & (1ULL << 63) ? 0 : offset;
12172
12173         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12174         if (ret < 0)
12175                 goto out;
12176
12177         /*
12178          * Search afterwards to get correct one
12179          * NOTE: As we must do a comprehensive check on the data backref to
12180          * make sure the dref count also matches, we must iterate all file
12181          * extents for that inode.
12182          */
12183         while (1) {
12184                 leaf = path.nodes[0];
12185                 slot = path.slots[0];
12186
12187                 if (slot >= btrfs_header_nritems(leaf) ||
12188                     btrfs_header_owner(leaf) != root_id)
12189                         goto next;
12190                 btrfs_item_key_to_cpu(leaf, &key, slot);
12191                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12192                         break;
12193                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12194                 /*
12195                  * Except normal disk bytenr and disk num bytes, we still
12196                  * need to do extra check on dbackref offset as
12197                  * dbackref offset = file_offset - file_extent_offset
12198                  *
12199                  * Also, we must check the leaf owner.
12200                  * In case of shared tree blocks (snapshots) we can inherit
12201                  * leaves from source snapshot.
12202                  * In that case, reference from source snapshot should not
12203                  * count.
12204                  */
12205                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12206                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12207                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12208                     offset && btrfs_header_owner(leaf) == root_id)
12209                         found_count++;
12210
12211 next:
12212                 ret = btrfs_next_item(root, &path);
12213                 if (ret)
12214                         break;
12215         }
12216 out:
12217         btrfs_release_path(&path);
12218         if (found_count != count) {
12219                 error(
12220 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12221                         bytenr, len, root_id, objectid, offset, count, found_count);
12222                 return REFERENCER_MISSING;
12223         }
12224         return 0;
12225 }
12226
12227 /*
12228  * Check if the referencer of a shared data backref exists
12229  */
12230 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12231                                      u64 parent, u64 bytenr)
12232 {
12233         struct extent_buffer *eb;
12234         struct btrfs_key key;
12235         struct btrfs_file_extent_item *fi;
12236         u32 nr;
12237         int found_parent = 0;
12238         int i;
12239
12240         eb = read_tree_block(fs_info, parent, 0);
12241         if (!extent_buffer_uptodate(eb))
12242                 goto out;
12243
12244         nr = btrfs_header_nritems(eb);
12245         for (i = 0; i < nr; i++) {
12246                 btrfs_item_key_to_cpu(eb, &key, i);
12247                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12248                         continue;
12249
12250                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12251                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12252                         continue;
12253
12254                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12255                         found_parent = 1;
12256                         break;
12257                 }
12258         }
12259
12260 out:
12261         free_extent_buffer(eb);
12262         if (!found_parent) {
12263                 error("shared extent %llu referencer lost (parent: %llu)",
12264                         bytenr, parent);
12265                 return REFERENCER_MISSING;
12266         }
12267         return 0;
12268 }
12269
12270 /*
12271  * Only delete backref if REFERENCER_MISSING now
12272  *
12273  * Returns <0   the extent was deleted
12274  * Returns >0   the backref was deleted but extent still exists, returned value
12275  *               means error after repair
12276  * Returns  0   nothing happened
12277  */
12278 static int repair_extent_item(struct btrfs_trans_handle *trans,
12279                       struct btrfs_root *root, struct btrfs_path *path,
12280                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12281                       u64 owner, u64 offset, int err)
12282 {
12283         struct btrfs_key old_key;
12284         int freed = 0;
12285         int ret;
12286
12287         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12288
12289         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12290                 /* delete the backref */
12291                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12292                           num_bytes, parent, root_objectid, owner, offset);
12293                 if (!ret) {
12294                         freed = 1;
12295                         err &= ~REFERENCER_MISSING;
12296                         printf("Delete backref in extent [%llu %llu]\n",
12297                                bytenr, num_bytes);
12298                 } else {
12299                         error("fail to delete backref in extent [%llu %llu]",
12300                                bytenr, num_bytes);
12301                 }
12302         }
12303
12304         /* btrfs_free_extent may delete the extent */
12305         btrfs_release_path(path);
12306         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12307
12308         if (ret)
12309                 ret = -ENOENT;
12310         else if (freed)
12311                 ret = err;
12312         return ret;
12313 }
12314
12315 /*
12316  * This function will check a given extent item, including its backref and
12317  * itself (like crossing stripe boundary and type)
12318  *
12319  * Since we don't use extent_record anymore, introduce new error bit
12320  */
12321 static int check_extent_item(struct btrfs_trans_handle *trans,
12322                              struct btrfs_fs_info *fs_info,
12323                              struct btrfs_path *path)
12324 {
12325         struct btrfs_extent_item *ei;
12326         struct btrfs_extent_inline_ref *iref;
12327         struct btrfs_extent_data_ref *dref;
12328         struct extent_buffer *eb = path->nodes[0];
12329         unsigned long end;
12330         unsigned long ptr;
12331         int slot = path->slots[0];
12332         int type;
12333         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12334         u32 item_size = btrfs_item_size_nr(eb, slot);
12335         u64 flags;
12336         u64 offset;
12337         u64 parent;
12338         u64 num_bytes;
12339         u64 root_objectid;
12340         u64 owner;
12341         u64 owner_offset;
12342         int metadata = 0;
12343         int level;
12344         struct btrfs_key key;
12345         int ret;
12346         int err = 0;
12347
12348         btrfs_item_key_to_cpu(eb, &key, slot);
12349         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12350                 bytes_used += key.offset;
12351                 num_bytes = key.offset;
12352         } else {
12353                 bytes_used += nodesize;
12354                 num_bytes = nodesize;
12355         }
12356
12357         if (item_size < sizeof(*ei)) {
12358                 /*
12359                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12360                  * old thing when on disk format is still un-determined.
12361                  * No need to care about it anymore
12362                  */
12363                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12364                 return -ENOTTY;
12365         }
12366
12367         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12368         flags = btrfs_extent_flags(eb, ei);
12369
12370         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12371                 metadata = 1;
12372         if (metadata && check_crossing_stripes(global_info, key.objectid,
12373                                                eb->len)) {
12374                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12375                       key.objectid, key.objectid + nodesize);
12376                 err |= CROSSING_STRIPE_BOUNDARY;
12377         }
12378
12379         ptr = (unsigned long)(ei + 1);
12380
12381         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12382                 /* Old EXTENT_ITEM metadata */
12383                 struct btrfs_tree_block_info *info;
12384
12385                 info = (struct btrfs_tree_block_info *)ptr;
12386                 level = btrfs_tree_block_level(eb, info);
12387                 ptr += sizeof(struct btrfs_tree_block_info);
12388         } else {
12389                 /* New METADATA_ITEM */
12390                 level = key.offset;
12391         }
12392         end = (unsigned long)ei + item_size;
12393
12394 next:
12395         /* Reached extent item end normally */
12396         if (ptr == end)
12397                 goto out;
12398
12399         /* Beyond extent item end, wrong item size */
12400         if (ptr > end) {
12401                 err |= ITEM_SIZE_MISMATCH;
12402                 error("extent item at bytenr %llu slot %d has wrong size",
12403                         eb->start, slot);
12404                 goto out;
12405         }
12406
12407         parent = 0;
12408         root_objectid = 0;
12409         owner = 0;
12410         owner_offset = 0;
12411         /* Now check every backref in this extent item */
12412         iref = (struct btrfs_extent_inline_ref *)ptr;
12413         type = btrfs_extent_inline_ref_type(eb, iref);
12414         offset = btrfs_extent_inline_ref_offset(eb, iref);
12415         switch (type) {
12416         case BTRFS_TREE_BLOCK_REF_KEY:
12417                 root_objectid = offset;
12418                 owner = level;
12419                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12420                                                level);
12421                 err |= ret;
12422                 break;
12423         case BTRFS_SHARED_BLOCK_REF_KEY:
12424                 parent = offset;
12425                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12426                                                  level);
12427                 err |= ret;
12428                 break;
12429         case BTRFS_EXTENT_DATA_REF_KEY:
12430                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12431                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12432                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12433                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12434                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12435                                         owner_offset, key.objectid, key.offset,
12436                                         btrfs_extent_data_ref_count(eb, dref));
12437                 err |= ret;
12438                 break;
12439         case BTRFS_SHARED_DATA_REF_KEY:
12440                 parent = offset;
12441                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12442                 err |= ret;
12443                 break;
12444         default:
12445                 error("extent[%llu %d %llu] has unknown ref type: %d",
12446                         key.objectid, key.type, key.offset, type);
12447                 ret = UNKNOWN_TYPE;
12448                 err |= ret;
12449                 goto out;
12450         }
12451
12452         if (err && repair) {
12453                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12454                          key.objectid, num_bytes, parent, root_objectid,
12455                          owner, owner_offset, ret);
12456                 if (ret < 0)
12457                         goto out;
12458                 if (ret) {
12459                         goto next;
12460                         err = ret;
12461                 }
12462         }
12463
12464         ptr += btrfs_extent_inline_ref_size(type);
12465         goto next;
12466
12467 out:
12468         return err;
12469 }
12470
12471 /*
12472  * Check if a dev extent item is referred correctly by its chunk
12473  */
12474 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12475                                  struct extent_buffer *eb, int slot)
12476 {
12477         struct btrfs_root *chunk_root = fs_info->chunk_root;
12478         struct btrfs_dev_extent *ptr;
12479         struct btrfs_path path;
12480         struct btrfs_key chunk_key;
12481         struct btrfs_key devext_key;
12482         struct btrfs_chunk *chunk;
12483         struct extent_buffer *l;
12484         int num_stripes;
12485         u64 length;
12486         int i;
12487         int found_chunk = 0;
12488         int ret;
12489
12490         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12491         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12492         length = btrfs_dev_extent_length(eb, ptr);
12493
12494         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12495         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12496         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12497
12498         btrfs_init_path(&path);
12499         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12500         if (ret)
12501                 goto out;
12502
12503         l = path.nodes[0];
12504         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12505         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12506                                       chunk_key.offset);
12507         if (ret < 0)
12508                 goto out;
12509
12510         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12511                 goto out;
12512
12513         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12514         for (i = 0; i < num_stripes; i++) {
12515                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12516                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12517
12518                 if (devid == devext_key.objectid &&
12519                     offset == devext_key.offset) {
12520                         found_chunk = 1;
12521                         break;
12522                 }
12523         }
12524 out:
12525         btrfs_release_path(&path);
12526         if (!found_chunk) {
12527                 error(
12528                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12529                         devext_key.objectid, devext_key.offset, length);
12530                 return REFERENCER_MISSING;
12531         }
12532         return 0;
12533 }
12534
12535 /*
12536  * Check if the used space is correct with the dev item
12537  */
12538 static int check_dev_item(struct btrfs_fs_info *fs_info,
12539                           struct extent_buffer *eb, int slot)
12540 {
12541         struct btrfs_root *dev_root = fs_info->dev_root;
12542         struct btrfs_dev_item *dev_item;
12543         struct btrfs_path path;
12544         struct btrfs_key key;
12545         struct btrfs_dev_extent *ptr;
12546         u64 total_bytes;
12547         u64 dev_id;
12548         u64 used;
12549         u64 total = 0;
12550         int ret;
12551
12552         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12553         dev_id = btrfs_device_id(eb, dev_item);
12554         used = btrfs_device_bytes_used(eb, dev_item);
12555         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12556
12557         key.objectid = dev_id;
12558         key.type = BTRFS_DEV_EXTENT_KEY;
12559         key.offset = 0;
12560
12561         btrfs_init_path(&path);
12562         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12563         if (ret < 0) {
12564                 btrfs_item_key_to_cpu(eb, &key, slot);
12565                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12566                         key.objectid, key.type, key.offset);
12567                 btrfs_release_path(&path);
12568                 return REFERENCER_MISSING;
12569         }
12570
12571         /* Iterate dev_extents to calculate the used space of a device */
12572         while (1) {
12573                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12574                         goto next;
12575
12576                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12577                 if (key.objectid > dev_id)
12578                         break;
12579                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12580                         goto next;
12581
12582                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12583                                      struct btrfs_dev_extent);
12584                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12585 next:
12586                 ret = btrfs_next_item(dev_root, &path);
12587                 if (ret)
12588                         break;
12589         }
12590         btrfs_release_path(&path);
12591
12592         if (used != total) {
12593                 btrfs_item_key_to_cpu(eb, &key, slot);
12594                 error(
12595 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12596                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12597                         BTRFS_DEV_EXTENT_KEY, dev_id);
12598                 return ACCOUNTING_MISMATCH;
12599         }
12600         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12601
12602         return 0;
12603 }
12604
12605 /*
12606  * Check a block group item with its referener (chunk) and its used space
12607  * with extent/metadata item
12608  */
12609 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12610                                   struct extent_buffer *eb, int slot)
12611 {
12612         struct btrfs_root *extent_root = fs_info->extent_root;
12613         struct btrfs_root *chunk_root = fs_info->chunk_root;
12614         struct btrfs_block_group_item *bi;
12615         struct btrfs_block_group_item bg_item;
12616         struct btrfs_path path;
12617         struct btrfs_key bg_key;
12618         struct btrfs_key chunk_key;
12619         struct btrfs_key extent_key;
12620         struct btrfs_chunk *chunk;
12621         struct extent_buffer *leaf;
12622         struct btrfs_extent_item *ei;
12623         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12624         u64 flags;
12625         u64 bg_flags;
12626         u64 used;
12627         u64 total = 0;
12628         int ret;
12629         int err = 0;
12630
12631         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12632         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12633         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12634         used = btrfs_block_group_used(&bg_item);
12635         bg_flags = btrfs_block_group_flags(&bg_item);
12636
12637         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12638         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12639         chunk_key.offset = bg_key.objectid;
12640
12641         btrfs_init_path(&path);
12642         /* Search for the referencer chunk */
12643         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12644         if (ret) {
12645                 error(
12646                 "block group[%llu %llu] did not find the related chunk item",
12647                         bg_key.objectid, bg_key.offset);
12648                 err |= REFERENCER_MISSING;
12649         } else {
12650                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12651                                         struct btrfs_chunk);
12652                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12653                                                 bg_key.offset) {
12654                         error(
12655         "block group[%llu %llu] related chunk item length does not match",
12656                                 bg_key.objectid, bg_key.offset);
12657                         err |= REFERENCER_MISMATCH;
12658                 }
12659         }
12660         btrfs_release_path(&path);
12661
12662         /* Search from the block group bytenr */
12663         extent_key.objectid = bg_key.objectid;
12664         extent_key.type = 0;
12665         extent_key.offset = 0;
12666
12667         btrfs_init_path(&path);
12668         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12669         if (ret < 0)
12670                 goto out;
12671
12672         /* Iterate extent tree to account used space */
12673         while (1) {
12674                 leaf = path.nodes[0];
12675
12676                 /* Search slot can point to the last item beyond leaf nritems */
12677                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12678                         goto next;
12679
12680                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12681                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12682                         break;
12683
12684                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12685                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12686                         goto next;
12687                 if (extent_key.objectid < bg_key.objectid)
12688                         goto next;
12689
12690                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12691                         total += nodesize;
12692                 else
12693                         total += extent_key.offset;
12694
12695                 ei = btrfs_item_ptr(leaf, path.slots[0],
12696                                     struct btrfs_extent_item);
12697                 flags = btrfs_extent_flags(leaf, ei);
12698                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12699                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12700                                 error(
12701                         "bad extent[%llu, %llu) type mismatch with chunk",
12702                                         extent_key.objectid,
12703                                         extent_key.objectid + extent_key.offset);
12704                                 err |= CHUNK_TYPE_MISMATCH;
12705                         }
12706                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12707                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12708                                     BTRFS_BLOCK_GROUP_METADATA))) {
12709                                 error(
12710                         "bad extent[%llu, %llu) type mismatch with chunk",
12711                                         extent_key.objectid,
12712                                         extent_key.objectid + nodesize);
12713                                 err |= CHUNK_TYPE_MISMATCH;
12714                         }
12715                 }
12716 next:
12717                 ret = btrfs_next_item(extent_root, &path);
12718                 if (ret)
12719                         break;
12720         }
12721
12722 out:
12723         btrfs_release_path(&path);
12724
12725         if (total != used) {
12726                 error(
12727                 "block group[%llu %llu] used %llu but extent items used %llu",
12728                         bg_key.objectid, bg_key.offset, used, total);
12729                 err |= BG_ACCOUNTING_ERROR;
12730         }
12731         return err;
12732 }
12733
12734 /*
12735  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12736  * FIXME: We still need to repair error of dev_item.
12737  *
12738  * Returns error after repair.
12739  */
12740 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12741                              struct btrfs_root *chunk_root,
12742                              struct btrfs_path *path, int err)
12743 {
12744         struct btrfs_chunk *chunk;
12745         struct btrfs_key chunk_key;
12746         struct extent_buffer *eb = path->nodes[0];
12747         u64 length;
12748         int slot = path->slots[0];
12749         u64 type;
12750         int ret = 0;
12751
12752         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12753         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12754                 return err;
12755         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12756         type = btrfs_chunk_type(path->nodes[0], chunk);
12757         length = btrfs_chunk_length(eb, chunk);
12758
12759         if (err & REFERENCER_MISSING) {
12760                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12761                                              type, chunk_key.offset, length);
12762                 if (ret) {
12763                         error("fail to add block group item[%llu %llu]",
12764                               chunk_key.offset, length);
12765                         goto out;
12766                 } else {
12767                         err &= ~REFERENCER_MISSING;
12768                         printf("Added block group item[%llu %llu]\n",
12769                                chunk_key.offset, length);
12770                 }
12771         }
12772
12773 out:
12774         return err;
12775 }
12776
12777 /*
12778  * Check a chunk item.
12779  * Including checking all referred dev_extents and block group
12780  */
12781 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12782                             struct extent_buffer *eb, int slot)
12783 {
12784         struct btrfs_root *extent_root = fs_info->extent_root;
12785         struct btrfs_root *dev_root = fs_info->dev_root;
12786         struct btrfs_path path;
12787         struct btrfs_key chunk_key;
12788         struct btrfs_key bg_key;
12789         struct btrfs_key devext_key;
12790         struct btrfs_chunk *chunk;
12791         struct extent_buffer *leaf;
12792         struct btrfs_block_group_item *bi;
12793         struct btrfs_block_group_item bg_item;
12794         struct btrfs_dev_extent *ptr;
12795         u64 length;
12796         u64 chunk_end;
12797         u64 stripe_len;
12798         u64 type;
12799         int num_stripes;
12800         u64 offset;
12801         u64 objectid;
12802         int i;
12803         int ret;
12804         int err = 0;
12805
12806         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12807         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12808         length = btrfs_chunk_length(eb, chunk);
12809         chunk_end = chunk_key.offset + length;
12810         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12811                                       chunk_key.offset);
12812         if (ret < 0) {
12813                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12814                         chunk_end);
12815                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12816                 goto out;
12817         }
12818         type = btrfs_chunk_type(eb, chunk);
12819
12820         bg_key.objectid = chunk_key.offset;
12821         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12822         bg_key.offset = length;
12823
12824         btrfs_init_path(&path);
12825         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12826         if (ret) {
12827                 error(
12828                 "chunk[%llu %llu) did not find the related block group item",
12829                         chunk_key.offset, chunk_end);
12830                 err |= REFERENCER_MISSING;
12831         } else{
12832                 leaf = path.nodes[0];
12833                 bi = btrfs_item_ptr(leaf, path.slots[0],
12834                                     struct btrfs_block_group_item);
12835                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12836                                    sizeof(bg_item));
12837                 if (btrfs_block_group_flags(&bg_item) != type) {
12838                         error(
12839 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12840                                 chunk_key.offset, chunk_end, type,
12841                                 btrfs_block_group_flags(&bg_item));
12842                         err |= REFERENCER_MISSING;
12843                 }
12844         }
12845
12846         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12847         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12848         for (i = 0; i < num_stripes; i++) {
12849                 btrfs_release_path(&path);
12850                 btrfs_init_path(&path);
12851                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12852                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12853                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12854
12855                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12856                                         0, 0);
12857                 if (ret)
12858                         goto not_match_dev;
12859
12860                 leaf = path.nodes[0];
12861                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12862                                      struct btrfs_dev_extent);
12863                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12864                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12865                 if (objectid != chunk_key.objectid ||
12866                     offset != chunk_key.offset ||
12867                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12868                         goto not_match_dev;
12869                 continue;
12870 not_match_dev:
12871                 err |= BACKREF_MISSING;
12872                 error(
12873                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12874                         chunk_key.objectid, chunk_end, i);
12875                 continue;
12876         }
12877         btrfs_release_path(&path);
12878 out:
12879         return err;
12880 }
12881
12882 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12883                                    struct btrfs_root *root,
12884                                    struct btrfs_path *path)
12885 {
12886         struct btrfs_key key;
12887         int ret = 0;
12888
12889         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12890         btrfs_release_path(path);
12891         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12892         if (ret) {
12893                 ret = -ENOENT;
12894                 goto out;
12895         }
12896
12897         ret = btrfs_del_item(trans, root, path);
12898         if (ret)
12899                 goto out;
12900
12901         if (path->slots[0] == 0)
12902                 btrfs_prev_leaf(root, path);
12903         else
12904                 path->slots[0]--;
12905 out:
12906         if (ret)
12907                 error("failed to delete root %llu item[%llu, %u, %llu]",
12908                       root->objectid, key.objectid, key.type, key.offset);
12909         else
12910                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12911                        root->objectid, key.objectid, key.type, key.offset);
12912         return ret;
12913 }
12914
12915 /*
12916  * Main entry function to check known items and update related accounting info
12917  */
12918 static int check_leaf_items(struct btrfs_trans_handle *trans,
12919                             struct btrfs_root *root, struct btrfs_path *path,
12920                             struct node_refs *nrefs, int account_bytes)
12921 {
12922         struct btrfs_fs_info *fs_info = root->fs_info;
12923         struct btrfs_key key;
12924         struct extent_buffer *eb;
12925         int slot;
12926         int type;
12927         struct btrfs_extent_data_ref *dref;
12928         int ret = 0;
12929         int err = 0;
12930
12931 again:
12932         eb = path->nodes[0];
12933         slot = path->slots[0];
12934         if (slot >= btrfs_header_nritems(eb)) {
12935                 if (slot == 0) {
12936                         error("empty leaf [%llu %u] root %llu", eb->start,
12937                                 root->fs_info->nodesize, root->objectid);
12938                         err |= EIO;
12939                 }
12940                 goto out;
12941         }
12942
12943         btrfs_item_key_to_cpu(eb, &key, slot);
12944         type = key.type;
12945
12946         switch (type) {
12947         case BTRFS_EXTENT_DATA_KEY:
12948                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
12949                 if (repair && ret)
12950                         ret = repair_extent_data_item(trans, root, path, nrefs,
12951                                                       ret);
12952                 err |= ret;
12953                 break;
12954         case BTRFS_BLOCK_GROUP_ITEM_KEY:
12955                 ret = check_block_group_item(fs_info, eb, slot);
12956                 if (repair &&
12957                     ret & REFERENCER_MISSING)
12958                         ret = delete_extent_tree_item(trans, root, path);
12959                 err |= ret;
12960                 break;
12961         case BTRFS_DEV_ITEM_KEY:
12962                 ret = check_dev_item(fs_info, eb, slot);
12963                 err |= ret;
12964                 break;
12965         case BTRFS_CHUNK_ITEM_KEY:
12966                 ret = check_chunk_item(fs_info, eb, slot);
12967                 if (repair && ret)
12968                         ret = repair_chunk_item(trans, root, path, ret);
12969                 err |= ret;
12970                 break;
12971         case BTRFS_DEV_EXTENT_KEY:
12972                 ret = check_dev_extent_item(fs_info, eb, slot);
12973                 err |= ret;
12974                 break;
12975         case BTRFS_EXTENT_ITEM_KEY:
12976         case BTRFS_METADATA_ITEM_KEY:
12977                 ret = check_extent_item(trans, fs_info, path);
12978                 err |= ret;
12979                 break;
12980         case BTRFS_EXTENT_CSUM_KEY:
12981                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
12982                 err |= ret;
12983                 break;
12984         case BTRFS_TREE_BLOCK_REF_KEY:
12985                 ret = check_tree_block_backref(fs_info, key.offset,
12986                                                key.objectid, -1);
12987                 if (repair &&
12988                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
12989                         ret = delete_extent_tree_item(trans, root, path);
12990                 err |= ret;
12991                 break;
12992         case BTRFS_EXTENT_DATA_REF_KEY:
12993                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
12994                 ret = check_extent_data_backref(fs_info,
12995                                 btrfs_extent_data_ref_root(eb, dref),
12996                                 btrfs_extent_data_ref_objectid(eb, dref),
12997                                 btrfs_extent_data_ref_offset(eb, dref),
12998                                 key.objectid, 0,
12999                                 btrfs_extent_data_ref_count(eb, dref));
13000                 if (repair &&
13001                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13002                         ret = delete_extent_tree_item(trans, root, path);
13003                 err |= ret;
13004                 break;
13005         case BTRFS_SHARED_BLOCK_REF_KEY:
13006                 ret = check_shared_block_backref(fs_info, key.offset,
13007                                                  key.objectid, -1);
13008                 if (repair &&
13009                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13010                         ret = delete_extent_tree_item(trans, root, path);
13011                 err |= ret;
13012                 break;
13013         case BTRFS_SHARED_DATA_REF_KEY:
13014                 ret = check_shared_data_backref(fs_info, key.offset,
13015                                                 key.objectid);
13016                 if (repair &&
13017                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13018                         ret = delete_extent_tree_item(trans, root, path);
13019                 err |= ret;
13020                 break;
13021         default:
13022                 break;
13023         }
13024
13025         ++path->slots[0];
13026         goto again;
13027 out:
13028         return err;
13029 }
13030
13031 /*
13032  * Low memory usage version check_chunks_and_extents.
13033  */
13034 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13035 {
13036         struct btrfs_trans_handle *trans = NULL;
13037         struct btrfs_path path;
13038         struct btrfs_key old_key;
13039         struct btrfs_key key;
13040         struct btrfs_root *root1;
13041         struct btrfs_root *root;
13042         struct btrfs_root *cur_root;
13043         int err = 0;
13044         int ret;
13045
13046         root = fs_info->fs_root;
13047
13048         if (repair) {
13049                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13050                 if (IS_ERR(trans)) {
13051                         error("failed to start transaction before check");
13052                         return PTR_ERR(trans);
13053                 }
13054         }
13055
13056         root1 = root->fs_info->chunk_root;
13057         ret = check_btrfs_root(trans, root1, 0, 1);
13058         err |= ret;
13059
13060         root1 = root->fs_info->tree_root;
13061         ret = check_btrfs_root(trans, root1, 0, 1);
13062         err |= ret;
13063
13064         btrfs_init_path(&path);
13065         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13066         key.offset = 0;
13067         key.type = BTRFS_ROOT_ITEM_KEY;
13068
13069         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13070         if (ret) {
13071                 error("cannot find extent tree in tree_root");
13072                 goto out;
13073         }
13074
13075         while (1) {
13076                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13077                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13078                         goto next;
13079                 old_key = key;
13080                 key.offset = (u64)-1;
13081
13082                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13083                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13084                                         &key);
13085                 else
13086                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13087                 if (IS_ERR(cur_root) || !cur_root) {
13088                         error("failed to read tree: %lld", key.objectid);
13089                         goto next;
13090                 }
13091
13092                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13093                 err |= ret;
13094
13095                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13096                         btrfs_free_fs_root(cur_root);
13097
13098                 btrfs_release_path(&path);
13099                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13100                                         &old_key, &path, 0, 0);
13101                 if (ret)
13102                         goto out;
13103 next:
13104                 ret = btrfs_next_item(root1, &path);
13105                 if (ret)
13106                         goto out;
13107         }
13108 out:
13109
13110         /* if repair, update block accounting */
13111         if (repair) {
13112                 ret = btrfs_fix_block_accounting(trans, root);
13113                 if (ret)
13114                         err |= ret;
13115                 else
13116                         err &= ~BG_ACCOUNTING_ERROR;
13117         }
13118
13119         if (trans)
13120                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13121
13122         btrfs_release_path(&path);
13123
13124         return err;
13125 }
13126
13127 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13128 {
13129         int ret;
13130
13131         if (!ctx.progress_enabled)
13132                 fprintf(stderr, "checking extents\n");
13133         if (check_mode == CHECK_MODE_LOWMEM)
13134                 ret = check_chunks_and_extents_v2(fs_info);
13135         else
13136                 ret = check_chunks_and_extents(fs_info);
13137
13138         /* Also repair device size related problems */
13139         if (repair && !ret) {
13140                 ret = btrfs_fix_device_and_super_size(fs_info);
13141                 if (ret > 0)
13142                         ret = 0;
13143         }
13144         return ret;
13145 }
13146
13147 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13148                            struct btrfs_root *root, int overwrite)
13149 {
13150         struct extent_buffer *c;
13151         struct extent_buffer *old = root->node;
13152         int level;
13153         int ret;
13154         struct btrfs_disk_key disk_key = {0,0,0};
13155
13156         level = 0;
13157
13158         if (overwrite) {
13159                 c = old;
13160                 extent_buffer_get(c);
13161                 goto init;
13162         }
13163         c = btrfs_alloc_free_block(trans, root,
13164                                    root->fs_info->nodesize,
13165                                    root->root_key.objectid,
13166                                    &disk_key, level, 0, 0);
13167         if (IS_ERR(c)) {
13168                 c = old;
13169                 extent_buffer_get(c);
13170                 overwrite = 1;
13171         }
13172 init:
13173         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13174         btrfs_set_header_level(c, level);
13175         btrfs_set_header_bytenr(c, c->start);
13176         btrfs_set_header_generation(c, trans->transid);
13177         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13178         btrfs_set_header_owner(c, root->root_key.objectid);
13179
13180         write_extent_buffer(c, root->fs_info->fsid,
13181                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13182
13183         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13184                             btrfs_header_chunk_tree_uuid(c),
13185                             BTRFS_UUID_SIZE);
13186
13187         btrfs_mark_buffer_dirty(c);
13188         /*
13189          * this case can happen in the following case:
13190          *
13191          * 1.overwrite previous root.
13192          *
13193          * 2.reinit reloc data root, this is because we skip pin
13194          * down reloc data tree before which means we can allocate
13195          * same block bytenr here.
13196          */
13197         if (old->start == c->start) {
13198                 btrfs_set_root_generation(&root->root_item,
13199                                           trans->transid);
13200                 root->root_item.level = btrfs_header_level(root->node);
13201                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13202                                         &root->root_key, &root->root_item);
13203                 if (ret) {
13204                         free_extent_buffer(c);
13205                         return ret;
13206                 }
13207         }
13208         free_extent_buffer(old);
13209         root->node = c;
13210         add_root_to_dirty_list(root);
13211         return 0;
13212 }
13213
13214 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13215                                 struct extent_buffer *eb, int tree_root)
13216 {
13217         struct extent_buffer *tmp;
13218         struct btrfs_root_item *ri;
13219         struct btrfs_key key;
13220         u64 bytenr;
13221         int level = btrfs_header_level(eb);
13222         int nritems;
13223         int ret;
13224         int i;
13225
13226         /*
13227          * If we have pinned this block before, don't pin it again.
13228          * This can not only avoid forever loop with broken filesystem
13229          * but also give us some speedups.
13230          */
13231         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13232                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13233                 return 0;
13234
13235         btrfs_pin_extent(fs_info, eb->start, eb->len);
13236
13237         nritems = btrfs_header_nritems(eb);
13238         for (i = 0; i < nritems; i++) {
13239                 if (level == 0) {
13240                         btrfs_item_key_to_cpu(eb, &key, i);
13241                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13242                                 continue;
13243                         /* Skip the extent root and reloc roots */
13244                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13245                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13246                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13247                                 continue;
13248                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13249                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13250
13251                         /*
13252                          * If at any point we start needing the real root we
13253                          * will have to build a stump root for the root we are
13254                          * in, but for now this doesn't actually use the root so
13255                          * just pass in extent_root.
13256                          */
13257                         tmp = read_tree_block(fs_info, bytenr, 0);
13258                         if (!extent_buffer_uptodate(tmp)) {
13259                                 fprintf(stderr, "Error reading root block\n");
13260                                 return -EIO;
13261                         }
13262                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13263                         free_extent_buffer(tmp);
13264                         if (ret)
13265                                 return ret;
13266                 } else {
13267                         bytenr = btrfs_node_blockptr(eb, i);
13268
13269                         /* If we aren't the tree root don't read the block */
13270                         if (level == 1 && !tree_root) {
13271                                 btrfs_pin_extent(fs_info, bytenr,
13272                                                 fs_info->nodesize);
13273                                 continue;
13274                         }
13275
13276                         tmp = read_tree_block(fs_info, bytenr, 0);
13277                         if (!extent_buffer_uptodate(tmp)) {
13278                                 fprintf(stderr, "Error reading tree block\n");
13279                                 return -EIO;
13280                         }
13281                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13282                         free_extent_buffer(tmp);
13283                         if (ret)
13284                                 return ret;
13285                 }
13286         }
13287
13288         return 0;
13289 }
13290
13291 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13292 {
13293         int ret;
13294
13295         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13296         if (ret)
13297                 return ret;
13298
13299         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13300 }
13301
13302 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13303 {
13304         struct btrfs_block_group_cache *cache;
13305         struct btrfs_path path;
13306         struct extent_buffer *leaf;
13307         struct btrfs_chunk *chunk;
13308         struct btrfs_key key;
13309         int ret;
13310         u64 start;
13311
13312         btrfs_init_path(&path);
13313         key.objectid = 0;
13314         key.type = BTRFS_CHUNK_ITEM_KEY;
13315         key.offset = 0;
13316         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13317         if (ret < 0) {
13318                 btrfs_release_path(&path);
13319                 return ret;
13320         }
13321
13322         /*
13323          * We do this in case the block groups were screwed up and had alloc
13324          * bits that aren't actually set on the chunks.  This happens with
13325          * restored images every time and could happen in real life I guess.
13326          */
13327         fs_info->avail_data_alloc_bits = 0;
13328         fs_info->avail_metadata_alloc_bits = 0;
13329         fs_info->avail_system_alloc_bits = 0;
13330
13331         /* First we need to create the in-memory block groups */
13332         while (1) {
13333                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13334                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13335                         if (ret < 0) {
13336                                 btrfs_release_path(&path);
13337                                 return ret;
13338                         }
13339                         if (ret) {
13340                                 ret = 0;
13341                                 break;
13342                         }
13343                 }
13344                 leaf = path.nodes[0];
13345                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13346                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13347                         path.slots[0]++;
13348                         continue;
13349                 }
13350
13351                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13352                 btrfs_add_block_group(fs_info, 0,
13353                                       btrfs_chunk_type(leaf, chunk), key.offset,
13354                                       btrfs_chunk_length(leaf, chunk));
13355                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13356                                  key.offset + btrfs_chunk_length(leaf, chunk));
13357                 path.slots[0]++;
13358         }
13359         start = 0;
13360         while (1) {
13361                 cache = btrfs_lookup_first_block_group(fs_info, start);
13362                 if (!cache)
13363                         break;
13364                 cache->cached = 1;
13365                 start = cache->key.objectid + cache->key.offset;
13366         }
13367
13368         btrfs_release_path(&path);
13369         return 0;
13370 }
13371
13372 static int reset_balance(struct btrfs_trans_handle *trans,
13373                          struct btrfs_fs_info *fs_info)
13374 {
13375         struct btrfs_root *root = fs_info->tree_root;
13376         struct btrfs_path path;
13377         struct extent_buffer *leaf;
13378         struct btrfs_key key;
13379         int del_slot, del_nr = 0;
13380         int ret;
13381         int found = 0;
13382
13383         btrfs_init_path(&path);
13384         key.objectid = BTRFS_BALANCE_OBJECTID;
13385         key.type = BTRFS_BALANCE_ITEM_KEY;
13386         key.offset = 0;
13387         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13388         if (ret) {
13389                 if (ret > 0)
13390                         ret = 0;
13391                 if (!ret)
13392                         goto reinit_data_reloc;
13393                 else
13394                         goto out;
13395         }
13396
13397         ret = btrfs_del_item(trans, root, &path);
13398         if (ret)
13399                 goto out;
13400         btrfs_release_path(&path);
13401
13402         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13403         key.type = BTRFS_ROOT_ITEM_KEY;
13404         key.offset = 0;
13405         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13406         if (ret < 0)
13407                 goto out;
13408         while (1) {
13409                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13410                         if (!found)
13411                                 break;
13412
13413                         if (del_nr) {
13414                                 ret = btrfs_del_items(trans, root, &path,
13415                                                       del_slot, del_nr);
13416                                 del_nr = 0;
13417                                 if (ret)
13418                                         goto out;
13419                         }
13420                         key.offset++;
13421                         btrfs_release_path(&path);
13422
13423                         found = 0;
13424                         ret = btrfs_search_slot(trans, root, &key, &path,
13425                                                 -1, 1);
13426                         if (ret < 0)
13427                                 goto out;
13428                         continue;
13429                 }
13430                 found = 1;
13431                 leaf = path.nodes[0];
13432                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13433                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13434                         break;
13435                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13436                         path.slots[0]++;
13437                         continue;
13438                 }
13439                 if (!del_nr) {
13440                         del_slot = path.slots[0];
13441                         del_nr = 1;
13442                 } else {
13443                         del_nr++;
13444                 }
13445                 path.slots[0]++;
13446         }
13447
13448         if (del_nr) {
13449                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13450                 if (ret)
13451                         goto out;
13452         }
13453         btrfs_release_path(&path);
13454
13455 reinit_data_reloc:
13456         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13457         key.type = BTRFS_ROOT_ITEM_KEY;
13458         key.offset = (u64)-1;
13459         root = btrfs_read_fs_root(fs_info, &key);
13460         if (IS_ERR(root)) {
13461                 fprintf(stderr, "Error reading data reloc tree\n");
13462                 ret = PTR_ERR(root);
13463                 goto out;
13464         }
13465         record_root_in_trans(trans, root);
13466         ret = btrfs_fsck_reinit_root(trans, root, 0);
13467         if (ret)
13468                 goto out;
13469         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13470 out:
13471         btrfs_release_path(&path);
13472         return ret;
13473 }
13474
13475 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13476                               struct btrfs_fs_info *fs_info)
13477 {
13478         u64 start = 0;
13479         int ret;
13480
13481         /*
13482          * The only reason we don't do this is because right now we're just
13483          * walking the trees we find and pinning down their bytes, we don't look
13484          * at any of the leaves.  In order to do mixed groups we'd have to check
13485          * the leaves of any fs roots and pin down the bytes for any file
13486          * extents we find.  Not hard but why do it if we don't have to?
13487          */
13488         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13489                 fprintf(stderr, "We don't support re-initing the extent tree "
13490                         "for mixed block groups yet, please notify a btrfs "
13491                         "developer you want to do this so they can add this "
13492                         "functionality.\n");
13493                 return -EINVAL;
13494         }
13495
13496         /*
13497          * first we need to walk all of the trees except the extent tree and pin
13498          * down the bytes that are in use so we don't overwrite any existing
13499          * metadata.
13500          */
13501         ret = pin_metadata_blocks(fs_info);
13502         if (ret) {
13503                 fprintf(stderr, "error pinning down used bytes\n");
13504                 return ret;
13505         }
13506
13507         /*
13508          * Need to drop all the block groups since we're going to recreate all
13509          * of them again.
13510          */
13511         btrfs_free_block_groups(fs_info);
13512         ret = reset_block_groups(fs_info);
13513         if (ret) {
13514                 fprintf(stderr, "error resetting the block groups\n");
13515                 return ret;
13516         }
13517
13518         /* Ok we can allocate now, reinit the extent root */
13519         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13520         if (ret) {
13521                 fprintf(stderr, "extent root initialization failed\n");
13522                 /*
13523                  * When the transaction code is updated we should end the
13524                  * transaction, but for now progs only knows about commit so
13525                  * just return an error.
13526                  */
13527                 return ret;
13528         }
13529
13530         /*
13531          * Now we have all the in-memory block groups setup so we can make
13532          * allocations properly, and the metadata we care about is safe since we
13533          * pinned all of it above.
13534          */
13535         while (1) {
13536                 struct btrfs_block_group_cache *cache;
13537
13538                 cache = btrfs_lookup_first_block_group(fs_info, start);
13539                 if (!cache)
13540                         break;
13541                 start = cache->key.objectid + cache->key.offset;
13542                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13543                                         &cache->key, &cache->item,
13544                                         sizeof(cache->item));
13545                 if (ret) {
13546                         fprintf(stderr, "Error adding block group\n");
13547                         return ret;
13548                 }
13549                 btrfs_extent_post_op(trans, fs_info->extent_root);
13550         }
13551
13552         ret = reset_balance(trans, fs_info);
13553         if (ret)
13554                 fprintf(stderr, "error resetting the pending balance\n");
13555
13556         return ret;
13557 }
13558
13559 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13560 {
13561         struct btrfs_path path;
13562         struct btrfs_trans_handle *trans;
13563         struct btrfs_key key;
13564         int ret;
13565
13566         printf("Recowing metadata block %llu\n", eb->start);
13567         key.objectid = btrfs_header_owner(eb);
13568         key.type = BTRFS_ROOT_ITEM_KEY;
13569         key.offset = (u64)-1;
13570
13571         root = btrfs_read_fs_root(root->fs_info, &key);
13572         if (IS_ERR(root)) {
13573                 fprintf(stderr, "Couldn't find owner root %llu\n",
13574                         key.objectid);
13575                 return PTR_ERR(root);
13576         }
13577
13578         trans = btrfs_start_transaction(root, 1);
13579         if (IS_ERR(trans))
13580                 return PTR_ERR(trans);
13581
13582         btrfs_init_path(&path);
13583         path.lowest_level = btrfs_header_level(eb);
13584         if (path.lowest_level)
13585                 btrfs_node_key_to_cpu(eb, &key, 0);
13586         else
13587                 btrfs_item_key_to_cpu(eb, &key, 0);
13588
13589         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13590         btrfs_commit_transaction(trans, root);
13591         btrfs_release_path(&path);
13592         return ret;
13593 }
13594
13595 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13596 {
13597         struct btrfs_path path;
13598         struct btrfs_trans_handle *trans;
13599         struct btrfs_key key;
13600         int ret;
13601
13602         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13603                bad->key.type, bad->key.offset);
13604         key.objectid = bad->root_id;
13605         key.type = BTRFS_ROOT_ITEM_KEY;
13606         key.offset = (u64)-1;
13607
13608         root = btrfs_read_fs_root(root->fs_info, &key);
13609         if (IS_ERR(root)) {
13610                 fprintf(stderr, "Couldn't find owner root %llu\n",
13611                         key.objectid);
13612                 return PTR_ERR(root);
13613         }
13614
13615         trans = btrfs_start_transaction(root, 1);
13616         if (IS_ERR(trans))
13617                 return PTR_ERR(trans);
13618
13619         btrfs_init_path(&path);
13620         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13621         if (ret) {
13622                 if (ret > 0)
13623                         ret = 0;
13624                 goto out;
13625         }
13626         ret = btrfs_del_item(trans, root, &path);
13627 out:
13628         btrfs_commit_transaction(trans, root);
13629         btrfs_release_path(&path);
13630         return ret;
13631 }
13632
13633 static int zero_log_tree(struct btrfs_root *root)
13634 {
13635         struct btrfs_trans_handle *trans;
13636         int ret;
13637
13638         trans = btrfs_start_transaction(root, 1);
13639         if (IS_ERR(trans)) {
13640                 ret = PTR_ERR(trans);
13641                 return ret;
13642         }
13643         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13644         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13645         ret = btrfs_commit_transaction(trans, root);
13646         return ret;
13647 }
13648
13649 static int populate_csum(struct btrfs_trans_handle *trans,
13650                          struct btrfs_root *csum_root, char *buf, u64 start,
13651                          u64 len)
13652 {
13653         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13654         u64 offset = 0;
13655         u64 sectorsize;
13656         int ret = 0;
13657
13658         while (offset < len) {
13659                 sectorsize = fs_info->sectorsize;
13660                 ret = read_extent_data(fs_info, buf, start + offset,
13661                                        &sectorsize, 0);
13662                 if (ret)
13663                         break;
13664                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13665                                             start + offset, buf, sectorsize);
13666                 if (ret)
13667                         break;
13668                 offset += sectorsize;
13669         }
13670         return ret;
13671 }
13672
13673 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13674                                       struct btrfs_root *csum_root,
13675                                       struct btrfs_root *cur_root)
13676 {
13677         struct btrfs_path path;
13678         struct btrfs_key key;
13679         struct extent_buffer *node;
13680         struct btrfs_file_extent_item *fi;
13681         char *buf = NULL;
13682         u64 start = 0;
13683         u64 len = 0;
13684         int slot = 0;
13685         int ret = 0;
13686
13687         buf = malloc(cur_root->fs_info->sectorsize);
13688         if (!buf)
13689                 return -ENOMEM;
13690
13691         btrfs_init_path(&path);
13692         key.objectid = 0;
13693         key.offset = 0;
13694         key.type = 0;
13695         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13696         if (ret < 0)
13697                 goto out;
13698         /* Iterate all regular file extents and fill its csum */
13699         while (1) {
13700                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13701
13702                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13703                         goto next;
13704                 node = path.nodes[0];
13705                 slot = path.slots[0];
13706                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13707                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13708                         goto next;
13709                 start = btrfs_file_extent_disk_bytenr(node, fi);
13710                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13711
13712                 ret = populate_csum(trans, csum_root, buf, start, len);
13713                 if (ret == -EEXIST)
13714                         ret = 0;
13715                 if (ret < 0)
13716                         goto out;
13717 next:
13718                 /*
13719                  * TODO: if next leaf is corrupted, jump to nearest next valid
13720                  * leaf.
13721                  */
13722                 ret = btrfs_next_item(cur_root, &path);
13723                 if (ret < 0)
13724                         goto out;
13725                 if (ret > 0) {
13726                         ret = 0;
13727                         goto out;
13728                 }
13729         }
13730
13731 out:
13732         btrfs_release_path(&path);
13733         free(buf);
13734         return ret;
13735 }
13736
13737 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13738                                   struct btrfs_root *csum_root)
13739 {
13740         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13741         struct btrfs_path path;
13742         struct btrfs_root *tree_root = fs_info->tree_root;
13743         struct btrfs_root *cur_root;
13744         struct extent_buffer *node;
13745         struct btrfs_key key;
13746         int slot = 0;
13747         int ret = 0;
13748
13749         btrfs_init_path(&path);
13750         key.objectid = BTRFS_FS_TREE_OBJECTID;
13751         key.offset = 0;
13752         key.type = BTRFS_ROOT_ITEM_KEY;
13753         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13754         if (ret < 0)
13755                 goto out;
13756         if (ret > 0) {
13757                 ret = -ENOENT;
13758                 goto out;
13759         }
13760
13761         while (1) {
13762                 node = path.nodes[0];
13763                 slot = path.slots[0];
13764                 btrfs_item_key_to_cpu(node, &key, slot);
13765                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13766                         goto out;
13767                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13768                         goto next;
13769                 if (!is_fstree(key.objectid))
13770                         goto next;
13771                 key.offset = (u64)-1;
13772
13773                 cur_root = btrfs_read_fs_root(fs_info, &key);
13774                 if (IS_ERR(cur_root) || !cur_root) {
13775                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13776                                 key.objectid);
13777                         goto out;
13778                 }
13779                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13780                                 cur_root);
13781                 if (ret < 0)
13782                         goto out;
13783 next:
13784                 ret = btrfs_next_item(tree_root, &path);
13785                 if (ret > 0) {
13786                         ret = 0;
13787                         goto out;
13788                 }
13789                 if (ret < 0)
13790                         goto out;
13791         }
13792
13793 out:
13794         btrfs_release_path(&path);
13795         return ret;
13796 }
13797
13798 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13799                                       struct btrfs_root *csum_root)
13800 {
13801         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13802         struct btrfs_path path;
13803         struct btrfs_extent_item *ei;
13804         struct extent_buffer *leaf;
13805         char *buf;
13806         struct btrfs_key key;
13807         int ret;
13808
13809         btrfs_init_path(&path);
13810         key.objectid = 0;
13811         key.type = BTRFS_EXTENT_ITEM_KEY;
13812         key.offset = 0;
13813         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13814         if (ret < 0) {
13815                 btrfs_release_path(&path);
13816                 return ret;
13817         }
13818
13819         buf = malloc(csum_root->fs_info->sectorsize);
13820         if (!buf) {
13821                 btrfs_release_path(&path);
13822                 return -ENOMEM;
13823         }
13824
13825         while (1) {
13826                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13827                         ret = btrfs_next_leaf(extent_root, &path);
13828                         if (ret < 0)
13829                                 break;
13830                         if (ret) {
13831                                 ret = 0;
13832                                 break;
13833                         }
13834                 }
13835                 leaf = path.nodes[0];
13836
13837                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13838                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13839                         path.slots[0]++;
13840                         continue;
13841                 }
13842
13843                 ei = btrfs_item_ptr(leaf, path.slots[0],
13844                                     struct btrfs_extent_item);
13845                 if (!(btrfs_extent_flags(leaf, ei) &
13846                       BTRFS_EXTENT_FLAG_DATA)) {
13847                         path.slots[0]++;
13848                         continue;
13849                 }
13850
13851                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13852                                     key.offset);
13853                 if (ret)
13854                         break;
13855                 path.slots[0]++;
13856         }
13857
13858         btrfs_release_path(&path);
13859         free(buf);
13860         return ret;
13861 }
13862
13863 /*
13864  * Recalculate the csum and put it into the csum tree.
13865  *
13866  * Extent tree init will wipe out all the extent info, so in that case, we
13867  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13868  * will use fs/subvol trees to init the csum tree.
13869  */
13870 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13871                           struct btrfs_root *csum_root,
13872                           int search_fs_tree)
13873 {
13874         if (search_fs_tree)
13875                 return fill_csum_tree_from_fs(trans, csum_root);
13876         else
13877                 return fill_csum_tree_from_extent(trans, csum_root);
13878 }
13879
13880 static void free_roots_info_cache(void)
13881 {
13882         if (!roots_info_cache)
13883                 return;
13884
13885         while (!cache_tree_empty(roots_info_cache)) {
13886                 struct cache_extent *entry;
13887                 struct root_item_info *rii;
13888
13889                 entry = first_cache_extent(roots_info_cache);
13890                 if (!entry)
13891                         break;
13892                 remove_cache_extent(roots_info_cache, entry);
13893                 rii = container_of(entry, struct root_item_info, cache_extent);
13894                 free(rii);
13895         }
13896
13897         free(roots_info_cache);
13898         roots_info_cache = NULL;
13899 }
13900
13901 static int build_roots_info_cache(struct btrfs_fs_info *info)
13902 {
13903         int ret = 0;
13904         struct btrfs_key key;
13905         struct extent_buffer *leaf;
13906         struct btrfs_path path;
13907
13908         if (!roots_info_cache) {
13909                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13910                 if (!roots_info_cache)
13911                         return -ENOMEM;
13912                 cache_tree_init(roots_info_cache);
13913         }
13914
13915         btrfs_init_path(&path);
13916         key.objectid = 0;
13917         key.type = BTRFS_EXTENT_ITEM_KEY;
13918         key.offset = 0;
13919         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13920         if (ret < 0)
13921                 goto out;
13922         leaf = path.nodes[0];
13923
13924         while (1) {
13925                 struct btrfs_key found_key;
13926                 struct btrfs_extent_item *ei;
13927                 struct btrfs_extent_inline_ref *iref;
13928                 int slot = path.slots[0];
13929                 int type;
13930                 u64 flags;
13931                 u64 root_id;
13932                 u8 level;
13933                 struct cache_extent *entry;
13934                 struct root_item_info *rii;
13935
13936                 if (slot >= btrfs_header_nritems(leaf)) {
13937                         ret = btrfs_next_leaf(info->extent_root, &path);
13938                         if (ret < 0) {
13939                                 break;
13940                         } else if (ret) {
13941                                 ret = 0;
13942                                 break;
13943                         }
13944                         leaf = path.nodes[0];
13945                         slot = path.slots[0];
13946                 }
13947
13948                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
13949
13950                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
13951                     found_key.type != BTRFS_METADATA_ITEM_KEY)
13952                         goto next;
13953
13954                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
13955                 flags = btrfs_extent_flags(leaf, ei);
13956
13957                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
13958                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
13959                         goto next;
13960
13961                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
13962                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
13963                         level = found_key.offset;
13964                 } else {
13965                         struct btrfs_tree_block_info *binfo;
13966
13967                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
13968                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
13969                         level = btrfs_tree_block_level(leaf, binfo);
13970                 }
13971
13972                 /*
13973                  * For a root extent, it must be of the following type and the
13974                  * first (and only one) iref in the item.
13975                  */
13976                 type = btrfs_extent_inline_ref_type(leaf, iref);
13977                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
13978                         goto next;
13979
13980                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
13981                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
13982                 if (!entry) {
13983                         rii = malloc(sizeof(struct root_item_info));
13984                         if (!rii) {
13985                                 ret = -ENOMEM;
13986                                 goto out;
13987                         }
13988                         rii->cache_extent.start = root_id;
13989                         rii->cache_extent.size = 1;
13990                         rii->level = (u8)-1;
13991                         entry = &rii->cache_extent;
13992                         ret = insert_cache_extent(roots_info_cache, entry);
13993                         ASSERT(ret == 0);
13994                 } else {
13995                         rii = container_of(entry, struct root_item_info,
13996                                            cache_extent);
13997                 }
13998
13999                 ASSERT(rii->cache_extent.start == root_id);
14000                 ASSERT(rii->cache_extent.size == 1);
14001
14002                 if (level > rii->level || rii->level == (u8)-1) {
14003                         rii->level = level;
14004                         rii->bytenr = found_key.objectid;
14005                         rii->gen = btrfs_extent_generation(leaf, ei);
14006                         rii->node_count = 1;
14007                 } else if (level == rii->level) {
14008                         rii->node_count++;
14009                 }
14010 next:
14011                 path.slots[0]++;
14012         }
14013
14014 out:
14015         btrfs_release_path(&path);
14016
14017         return ret;
14018 }
14019
14020 static int maybe_repair_root_item(struct btrfs_path *path,
14021                                   const struct btrfs_key *root_key,
14022                                   const int read_only_mode)
14023 {
14024         const u64 root_id = root_key->objectid;
14025         struct cache_extent *entry;
14026         struct root_item_info *rii;
14027         struct btrfs_root_item ri;
14028         unsigned long offset;
14029
14030         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14031         if (!entry) {
14032                 fprintf(stderr,
14033                         "Error: could not find extent items for root %llu\n",
14034                         root_key->objectid);
14035                 return -ENOENT;
14036         }
14037
14038         rii = container_of(entry, struct root_item_info, cache_extent);
14039         ASSERT(rii->cache_extent.start == root_id);
14040         ASSERT(rii->cache_extent.size == 1);
14041
14042         if (rii->node_count != 1) {
14043                 fprintf(stderr,
14044                         "Error: could not find btree root extent for root %llu\n",
14045                         root_id);
14046                 return -ENOENT;
14047         }
14048
14049         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14050         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14051
14052         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14053             btrfs_root_level(&ri) != rii->level ||
14054             btrfs_root_generation(&ri) != rii->gen) {
14055
14056                 /*
14057                  * If we're in repair mode but our caller told us to not update
14058                  * the root item, i.e. just check if it needs to be updated, don't
14059                  * print this message, since the caller will call us again shortly
14060                  * for the same root item without read only mode (the caller will
14061                  * open a transaction first).
14062                  */
14063                 if (!(read_only_mode && repair))
14064                         fprintf(stderr,
14065                                 "%sroot item for root %llu,"
14066                                 " current bytenr %llu, current gen %llu, current level %u,"
14067                                 " new bytenr %llu, new gen %llu, new level %u\n",
14068                                 (read_only_mode ? "" : "fixing "),
14069                                 root_id,
14070                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14071                                 btrfs_root_level(&ri),
14072                                 rii->bytenr, rii->gen, rii->level);
14073
14074                 if (btrfs_root_generation(&ri) > rii->gen) {
14075                         fprintf(stderr,
14076                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14077                                 root_id, btrfs_root_generation(&ri), rii->gen);
14078                         return -EINVAL;
14079                 }
14080
14081                 if (!read_only_mode) {
14082                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14083                         btrfs_set_root_level(&ri, rii->level);
14084                         btrfs_set_root_generation(&ri, rii->gen);
14085                         write_extent_buffer(path->nodes[0], &ri,
14086                                             offset, sizeof(ri));
14087                 }
14088
14089                 return 1;
14090         }
14091
14092         return 0;
14093 }
14094
14095 /*
14096  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14097  * caused read-only snapshots to be corrupted if they were created at a moment
14098  * when the source subvolume/snapshot had orphan items. The issue was that the
14099  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14100  * node instead of the post orphan cleanup root node.
14101  * So this function, and its callees, just detects and fixes those cases. Even
14102  * though the regression was for read-only snapshots, this function applies to
14103  * any snapshot/subvolume root.
14104  * This must be run before any other repair code - not doing it so, makes other
14105  * repair code delete or modify backrefs in the extent tree for example, which
14106  * will result in an inconsistent fs after repairing the root items.
14107  */
14108 static int repair_root_items(struct btrfs_fs_info *info)
14109 {
14110         struct btrfs_path path;
14111         struct btrfs_key key;
14112         struct extent_buffer *leaf;
14113         struct btrfs_trans_handle *trans = NULL;
14114         int ret = 0;
14115         int bad_roots = 0;
14116         int need_trans = 0;
14117
14118         btrfs_init_path(&path);
14119
14120         ret = build_roots_info_cache(info);
14121         if (ret)
14122                 goto out;
14123
14124         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14125         key.type = BTRFS_ROOT_ITEM_KEY;
14126         key.offset = 0;
14127
14128 again:
14129         /*
14130          * Avoid opening and committing transactions if a leaf doesn't have
14131          * any root items that need to be fixed, so that we avoid rotating
14132          * backup roots unnecessarily.
14133          */
14134         if (need_trans) {
14135                 trans = btrfs_start_transaction(info->tree_root, 1);
14136                 if (IS_ERR(trans)) {
14137                         ret = PTR_ERR(trans);
14138                         goto out;
14139                 }
14140         }
14141
14142         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14143                                 0, trans ? 1 : 0);
14144         if (ret < 0)
14145                 goto out;
14146         leaf = path.nodes[0];
14147
14148         while (1) {
14149                 struct btrfs_key found_key;
14150
14151                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14152                         int no_more_keys = find_next_key(&path, &key);
14153
14154                         btrfs_release_path(&path);
14155                         if (trans) {
14156                                 ret = btrfs_commit_transaction(trans,
14157                                                                info->tree_root);
14158                                 trans = NULL;
14159                                 if (ret < 0)
14160                                         goto out;
14161                         }
14162                         need_trans = 0;
14163                         if (no_more_keys)
14164                                 break;
14165                         goto again;
14166                 }
14167
14168                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14169
14170                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14171                         goto next;
14172                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14173                         goto next;
14174
14175                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14176                 if (ret < 0)
14177                         goto out;
14178                 if (ret) {
14179                         if (!trans && repair) {
14180                                 need_trans = 1;
14181                                 key = found_key;
14182                                 btrfs_release_path(&path);
14183                                 goto again;
14184                         }
14185                         bad_roots++;
14186                 }
14187 next:
14188                 path.slots[0]++;
14189         }
14190         ret = 0;
14191 out:
14192         free_roots_info_cache();
14193         btrfs_release_path(&path);
14194         if (trans)
14195                 btrfs_commit_transaction(trans, info->tree_root);
14196         if (ret < 0)
14197                 return ret;
14198
14199         return bad_roots;
14200 }
14201
14202 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14203 {
14204         struct btrfs_trans_handle *trans;
14205         struct btrfs_block_group_cache *bg_cache;
14206         u64 current = 0;
14207         int ret = 0;
14208
14209         /* Clear all free space cache inodes and its extent data */
14210         while (1) {
14211                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14212                 if (!bg_cache)
14213                         break;
14214                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14215                 if (ret < 0)
14216                         return ret;
14217                 current = bg_cache->key.objectid + bg_cache->key.offset;
14218         }
14219
14220         /* Don't forget to set cache_generation to -1 */
14221         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14222         if (IS_ERR(trans)) {
14223                 error("failed to update super block cache generation");
14224                 return PTR_ERR(trans);
14225         }
14226         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14227         btrfs_commit_transaction(trans, fs_info->tree_root);
14228
14229         return ret;
14230 }
14231
14232 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14233                 int clear_version)
14234 {
14235         int ret = 0;
14236
14237         if (clear_version == 1) {
14238                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14239                         error(
14240                 "free space cache v2 detected, use --clear-space-cache v2");
14241                         ret = 1;
14242                         goto close_out;
14243                 }
14244                 printf("Clearing free space cache\n");
14245                 ret = clear_free_space_cache(fs_info);
14246                 if (ret) {
14247                         error("failed to clear free space cache");
14248                         ret = 1;
14249                 } else {
14250                         printf("Free space cache cleared\n");
14251                 }
14252         } else if (clear_version == 2) {
14253                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14254                         printf("no free space cache v2 to clear\n");
14255                         ret = 0;
14256                         goto close_out;
14257                 }
14258                 printf("Clear free space cache v2\n");
14259                 ret = btrfs_clear_free_space_tree(fs_info);
14260                 if (ret) {
14261                         error("failed to clear free space cache v2: %d", ret);
14262                         ret = 1;
14263                 } else {
14264                         printf("free space cache v2 cleared\n");
14265                 }
14266         }
14267 close_out:
14268         return ret;
14269 }
14270
14271 const char * const cmd_check_usage[] = {
14272         "btrfs check [options] <device>",
14273         "Check structural integrity of a filesystem (unmounted).",
14274         "Check structural integrity of an unmounted filesystem. Verify internal",
14275         "trees' consistency and item connectivity. In the repair mode try to",
14276         "fix the problems found. ",
14277         "WARNING: the repair mode is considered dangerous",
14278         "",
14279         "-s|--super <superblock>     use this superblock copy",
14280         "-b|--backup                 use the first valid backup root copy",
14281         "--force                     skip mount checks, repair is not possible",
14282         "--repair                    try to repair the filesystem",
14283         "--readonly                  run in read-only mode (default)",
14284         "--init-csum-tree            create a new CRC tree",
14285         "--init-extent-tree          create a new extent tree",
14286         "--mode <MODE>               allows choice of memory/IO trade-offs",
14287         "                            where MODE is one of:",
14288         "                            original - read inodes and extents to memory (requires",
14289         "                                       more memory, does less IO)",
14290         "                            lowmem   - try to use less memory but read blocks again",
14291         "                                       when needed",
14292         "--check-data-csum           verify checksums of data blocks",
14293         "-Q|--qgroup-report          print a report on qgroup consistency",
14294         "-E|--subvol-extents <subvolid>",
14295         "                            print subvolume extents and sharing state",
14296         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14297         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14298         "-p|--progress               indicate progress",
14299         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14300         NULL
14301 };
14302
14303 int cmd_check(int argc, char **argv)
14304 {
14305         struct cache_tree root_cache;
14306         struct btrfs_root *root;
14307         struct btrfs_fs_info *info;
14308         u64 bytenr = 0;
14309         u64 subvolid = 0;
14310         u64 tree_root_bytenr = 0;
14311         u64 chunk_root_bytenr = 0;
14312         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14313         int ret = 0;
14314         int err = 0;
14315         u64 num;
14316         int init_csum_tree = 0;
14317         int readonly = 0;
14318         int clear_space_cache = 0;
14319         int qgroup_report = 0;
14320         int qgroups_repaired = 0;
14321         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14322         int force = 0;
14323
14324         while(1) {
14325                 int c;
14326                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14327                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14328                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14329                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14330                         GETOPT_VAL_FORCE };
14331                 static const struct option long_options[] = {
14332                         { "super", required_argument, NULL, 's' },
14333                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14334                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14335                         { "init-csum-tree", no_argument, NULL,
14336                                 GETOPT_VAL_INIT_CSUM },
14337                         { "init-extent-tree", no_argument, NULL,
14338                                 GETOPT_VAL_INIT_EXTENT },
14339                         { "check-data-csum", no_argument, NULL,
14340                                 GETOPT_VAL_CHECK_CSUM },
14341                         { "backup", no_argument, NULL, 'b' },
14342                         { "subvol-extents", required_argument, NULL, 'E' },
14343                         { "qgroup-report", no_argument, NULL, 'Q' },
14344                         { "tree-root", required_argument, NULL, 'r' },
14345                         { "chunk-root", required_argument, NULL,
14346                                 GETOPT_VAL_CHUNK_TREE },
14347                         { "progress", no_argument, NULL, 'p' },
14348                         { "mode", required_argument, NULL,
14349                                 GETOPT_VAL_MODE },
14350                         { "clear-space-cache", required_argument, NULL,
14351                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14352                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14353                         { NULL, 0, NULL, 0}
14354                 };
14355
14356                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14357                 if (c < 0)
14358                         break;
14359                 switch(c) {
14360                         case 'a': /* ignored */ break;
14361                         case 'b':
14362                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14363                                 break;
14364                         case 's':
14365                                 num = arg_strtou64(optarg);
14366                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14367                                         error(
14368                                         "super mirror should be less than %d",
14369                                                 BTRFS_SUPER_MIRROR_MAX);
14370                                         exit(1);
14371                                 }
14372                                 bytenr = btrfs_sb_offset(((int)num));
14373                                 printf("using SB copy %llu, bytenr %llu\n", num,
14374                                        (unsigned long long)bytenr);
14375                                 break;
14376                         case 'Q':
14377                                 qgroup_report = 1;
14378                                 break;
14379                         case 'E':
14380                                 subvolid = arg_strtou64(optarg);
14381                                 break;
14382                         case 'r':
14383                                 tree_root_bytenr = arg_strtou64(optarg);
14384                                 break;
14385                         case GETOPT_VAL_CHUNK_TREE:
14386                                 chunk_root_bytenr = arg_strtou64(optarg);
14387                                 break;
14388                         case 'p':
14389                                 ctx.progress_enabled = true;
14390                                 break;
14391                         case '?':
14392                         case 'h':
14393                                 usage(cmd_check_usage);
14394                         case GETOPT_VAL_REPAIR:
14395                                 printf("enabling repair mode\n");
14396                                 repair = 1;
14397                                 ctree_flags |= OPEN_CTREE_WRITES;
14398                                 break;
14399                         case GETOPT_VAL_READONLY:
14400                                 readonly = 1;
14401                                 break;
14402                         case GETOPT_VAL_INIT_CSUM:
14403                                 printf("Creating a new CRC tree\n");
14404                                 init_csum_tree = 1;
14405                                 repair = 1;
14406                                 ctree_flags |= OPEN_CTREE_WRITES;
14407                                 break;
14408                         case GETOPT_VAL_INIT_EXTENT:
14409                                 init_extent_tree = 1;
14410                                 ctree_flags |= (OPEN_CTREE_WRITES |
14411                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14412                                 repair = 1;
14413                                 break;
14414                         case GETOPT_VAL_CHECK_CSUM:
14415                                 check_data_csum = 1;
14416                                 break;
14417                         case GETOPT_VAL_MODE:
14418                                 check_mode = parse_check_mode(optarg);
14419                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14420                                         error("unknown mode: %s", optarg);
14421                                         exit(1);
14422                                 }
14423                                 break;
14424                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14425                                 if (strcmp(optarg, "v1") == 0) {
14426                                         clear_space_cache = 1;
14427                                 } else if (strcmp(optarg, "v2") == 0) {
14428                                         clear_space_cache = 2;
14429                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14430                                 } else {
14431                                         error(
14432                 "invalid argument to --clear-space-cache, must be v1 or v2");
14433                                         exit(1);
14434                                 }
14435                                 ctree_flags |= OPEN_CTREE_WRITES;
14436                                 break;
14437                         case GETOPT_VAL_FORCE:
14438                                 force = 1;
14439                                 break;
14440                 }
14441         }
14442
14443         if (check_argc_exact(argc - optind, 1))
14444                 usage(cmd_check_usage);
14445
14446         if (ctx.progress_enabled) {
14447                 ctx.tp = TASK_NOTHING;
14448                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14449         }
14450
14451         /* This check is the only reason for --readonly to exist */
14452         if (readonly && repair) {
14453                 error("repair options are not compatible with --readonly");
14454                 exit(1);
14455         }
14456
14457         /*
14458          * experimental and dangerous
14459          */
14460         if (repair && check_mode == CHECK_MODE_LOWMEM)
14461                 warning("low-memory mode repair support is only partial");
14462
14463         radix_tree_init();
14464         cache_tree_init(&root_cache);
14465
14466         ret = check_mounted(argv[optind]);
14467         if (!force) {
14468                 if (ret < 0) {
14469                         error("could not check mount status: %s",
14470                                         strerror(-ret));
14471                         err |= !!ret;
14472                         goto err_out;
14473                 } else if (ret) {
14474                         error(
14475 "%s is currently mounted, use --force if you really intend to check the filesystem",
14476                                 argv[optind]);
14477                         ret = -EBUSY;
14478                         err |= !!ret;
14479                         goto err_out;
14480                 }
14481         } else {
14482                 if (repair) {
14483                         error("repair and --force is not yet supported");
14484                         ret = 1;
14485                         err |= !!ret;
14486                         goto err_out;
14487                 }
14488                 if (ret < 0) {
14489                         warning(
14490 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14491                                 argv[optind]);
14492                 } else if (ret) {
14493                         warning(
14494                         "filesystem mounted, continuing because of --force");
14495                 }
14496                 /* A block device is mounted in exclusive mode by kernel */
14497                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14498         }
14499
14500         /* only allow partial opening under repair mode */
14501         if (repair)
14502                 ctree_flags |= OPEN_CTREE_PARTIAL;
14503
14504         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14505                                   chunk_root_bytenr, ctree_flags);
14506         if (!info) {
14507                 error("cannot open file system");
14508                 ret = -EIO;
14509                 err |= !!ret;
14510                 goto err_out;
14511         }
14512
14513         global_info = info;
14514         root = info->fs_root;
14515         uuid_unparse(info->super_copy->fsid, uuidbuf);
14516
14517         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14518
14519         /*
14520          * Check the bare minimum before starting anything else that could rely
14521          * on it, namely the tree roots, any local consistency checks
14522          */
14523         if (!extent_buffer_uptodate(info->tree_root->node) ||
14524             !extent_buffer_uptodate(info->dev_root->node) ||
14525             !extent_buffer_uptodate(info->chunk_root->node)) {
14526                 error("critical roots corrupted, unable to check the filesystem");
14527                 err |= !!ret;
14528                 ret = -EIO;
14529                 goto close_out;
14530         }
14531
14532         if (clear_space_cache) {
14533                 ret = do_clear_free_space_cache(info, clear_space_cache);
14534                 err |= !!ret;
14535                 goto close_out;
14536         }
14537
14538         /*
14539          * repair mode will force us to commit transaction which
14540          * will make us fail to load log tree when mounting.
14541          */
14542         if (repair && btrfs_super_log_root(info->super_copy)) {
14543                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14544                 if (!ret) {
14545                         ret = 1;
14546                         err |= !!ret;
14547                         goto close_out;
14548                 }
14549                 ret = zero_log_tree(root);
14550                 err |= !!ret;
14551                 if (ret) {
14552                         error("failed to zero log tree: %d", ret);
14553                         goto close_out;
14554                 }
14555         }
14556
14557         if (qgroup_report) {
14558                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14559                        uuidbuf);
14560                 ret = qgroup_verify_all(info);
14561                 err |= !!ret;
14562                 if (ret == 0)
14563                         report_qgroups(1);
14564                 goto close_out;
14565         }
14566         if (subvolid) {
14567                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14568                        subvolid, argv[optind], uuidbuf);
14569                 ret = print_extent_state(info, subvolid);
14570                 err |= !!ret;
14571                 goto close_out;
14572         }
14573
14574         if (init_extent_tree || init_csum_tree) {
14575                 struct btrfs_trans_handle *trans;
14576
14577                 trans = btrfs_start_transaction(info->extent_root, 0);
14578                 if (IS_ERR(trans)) {
14579                         error("error starting transaction");
14580                         ret = PTR_ERR(trans);
14581                         err |= !!ret;
14582                         goto close_out;
14583                 }
14584
14585                 if (init_extent_tree) {
14586                         printf("Creating a new extent tree\n");
14587                         ret = reinit_extent_tree(trans, info);
14588                         err |= !!ret;
14589                         if (ret)
14590                                 goto close_out;
14591                 }
14592
14593                 if (init_csum_tree) {
14594                         printf("Reinitialize checksum tree\n");
14595                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14596                         if (ret) {
14597                                 error("checksum tree initialization failed: %d",
14598                                                 ret);
14599                                 ret = -EIO;
14600                                 err |= !!ret;
14601                                 goto close_out;
14602                         }
14603
14604                         ret = fill_csum_tree(trans, info->csum_root,
14605                                              init_extent_tree);
14606                         err |= !!ret;
14607                         if (ret) {
14608                                 error("checksum tree refilling failed: %d", ret);
14609                                 return -EIO;
14610                         }
14611                 }
14612                 /*
14613                  * Ok now we commit and run the normal fsck, which will add
14614                  * extent entries for all of the items it finds.
14615                  */
14616                 ret = btrfs_commit_transaction(trans, info->extent_root);
14617                 err |= !!ret;
14618                 if (ret)
14619                         goto close_out;
14620         }
14621         if (!extent_buffer_uptodate(info->extent_root->node)) {
14622                 error("critical: extent_root, unable to check the filesystem");
14623                 ret = -EIO;
14624                 err |= !!ret;
14625                 goto close_out;
14626         }
14627         if (!extent_buffer_uptodate(info->csum_root->node)) {
14628                 error("critical: csum_root, unable to check the filesystem");
14629                 ret = -EIO;
14630                 err |= !!ret;
14631                 goto close_out;
14632         }
14633
14634         if (!init_extent_tree) {
14635                 ret = repair_root_items(info);
14636                 if (ret < 0) {
14637                         err = !!ret;
14638                         error("failed to repair root items: %s", strerror(-ret));
14639                         goto close_out;
14640                 }
14641                 if (repair) {
14642                         fprintf(stderr, "Fixed %d roots.\n", ret);
14643                         ret = 0;
14644                 } else if (ret > 0) {
14645                         fprintf(stderr,
14646                                 "Found %d roots with an outdated root item.\n",
14647                                 ret);
14648                         fprintf(stderr,
14649         "Please run a filesystem check with the option --repair to fix them.\n");
14650                         ret = 1;
14651                         err |= ret;
14652                         goto close_out;
14653                 }
14654         }
14655
14656         ret = do_check_chunks_and_extents(info);
14657         err |= !!ret;
14658         if (ret)
14659                 error(
14660                 "errors found in extent allocation tree or chunk allocation");
14661
14662         /* Only re-check super size after we checked and repaired the fs */
14663         err |= !is_super_size_valid(info);
14664
14665         if (!ctx.progress_enabled) {
14666                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14667                         fprintf(stderr, "checking free space tree\n");
14668                 else
14669                         fprintf(stderr, "checking free space cache\n");
14670         }
14671         ret = check_space_cache(root);
14672         err |= !!ret;
14673         if (ret) {
14674                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14675                         error("errors found in free space tree");
14676                 else
14677                         error("errors found in free space cache");
14678                 goto out;
14679         }
14680
14681         /*
14682          * We used to have to have these hole extents in between our real
14683          * extents so if we don't have this flag set we need to make sure there
14684          * are no gaps in the file extents for inodes, otherwise we can just
14685          * ignore it when this happens.
14686          */
14687         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14688         ret = do_check_fs_roots(info, &root_cache);
14689         err |= !!ret;
14690         if (ret) {
14691                 error("errors found in fs roots");
14692                 goto out;
14693         }
14694
14695         fprintf(stderr, "checking csums\n");
14696         ret = check_csums(root);
14697         err |= !!ret;
14698         if (ret) {
14699                 error("errors found in csum tree");
14700                 goto out;
14701         }
14702
14703         fprintf(stderr, "checking root refs\n");
14704         /* For low memory mode, check_fs_roots_v2 handles root refs */
14705         if (check_mode != CHECK_MODE_LOWMEM) {
14706                 ret = check_root_refs(root, &root_cache);
14707                 err |= !!ret;
14708                 if (ret) {
14709                         error("errors found in root refs");
14710                         goto out;
14711                 }
14712         }
14713
14714         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14715                 struct extent_buffer *eb;
14716
14717                 eb = list_first_entry(&root->fs_info->recow_ebs,
14718                                       struct extent_buffer, recow);
14719                 list_del_init(&eb->recow);
14720                 ret = recow_extent_buffer(root, eb);
14721                 err |= !!ret;
14722                 if (ret) {
14723                         error("fails to fix transid errors");
14724                         break;
14725                 }
14726         }
14727
14728         while (!list_empty(&delete_items)) {
14729                 struct bad_item *bad;
14730
14731                 bad = list_first_entry(&delete_items, struct bad_item, list);
14732                 list_del_init(&bad->list);
14733                 if (repair) {
14734                         ret = delete_bad_item(root, bad);
14735                         err |= !!ret;
14736                 }
14737                 free(bad);
14738         }
14739
14740         if (info->quota_enabled) {
14741                 fprintf(stderr, "checking quota groups\n");
14742                 ret = qgroup_verify_all(info);
14743                 err |= !!ret;
14744                 if (ret) {
14745                         error("failed to check quota groups");
14746                         goto out;
14747                 }
14748                 report_qgroups(0);
14749                 ret = repair_qgroups(info, &qgroups_repaired);
14750                 err |= !!ret;
14751                 if (err) {
14752                         error("failed to repair quota groups");
14753                         goto out;
14754                 }
14755                 ret = 0;
14756         }
14757
14758         if (!list_empty(&root->fs_info->recow_ebs)) {
14759                 error("transid errors in file system");
14760                 ret = 1;
14761                 err |= !!ret;
14762         }
14763 out:
14764         printf("found %llu bytes used, ",
14765                (unsigned long long)bytes_used);
14766         if (err)
14767                 printf("error(s) found\n");
14768         else
14769                 printf("no error found\n");
14770         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14771         printf("total tree bytes: %llu\n",
14772                (unsigned long long)total_btree_bytes);
14773         printf("total fs tree bytes: %llu\n",
14774                (unsigned long long)total_fs_tree_bytes);
14775         printf("total extent tree bytes: %llu\n",
14776                (unsigned long long)total_extent_tree_bytes);
14777         printf("btree space waste bytes: %llu\n",
14778                (unsigned long long)btree_space_waste);
14779         printf("file data blocks allocated: %llu\n referenced %llu\n",
14780                 (unsigned long long)data_bytes_allocated,
14781                 (unsigned long long)data_bytes_referenced);
14782
14783         free_qgroup_counts();
14784         free_root_recs_tree(&root_cache);
14785 close_out:
14786         close_ctree(root);
14787 err_out:
14788         if (ctx.progress_enabled)
14789                 task_deinit(ctx.info);
14790
14791         return err;
14792 }