ee76bdbecff9f3ee89bfa58dad4c94729bbe7f99
[platform/upstream/btrfs-progs.git] / check / main.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <fcntl.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 #include <getopt.h>
27 #include <uuid/uuid.h>
28 #include "ctree.h"
29 #include "volumes.h"
30 #include "repair.h"
31 #include "disk-io.h"
32 #include "print-tree.h"
33 #include "task-utils.h"
34 #include "transaction.h"
35 #include "utils.h"
36 #include "commands.h"
37 #include "free-space-cache.h"
38 #include "free-space-tree.h"
39 #include "btrfsck.h"
40 #include "qgroup-verify.h"
41 #include "rbtree-utils.h"
42 #include "backref.h"
43 #include "kernel-shared/ulist.h"
44 #include "hash.h"
45 #include "help.h"
46 #include "check/original.h"
47
48 enum task_position {
49         TASK_EXTENTS,
50         TASK_FREE_SPACE,
51         TASK_FS_ROOTS,
52         TASK_NOTHING, /* have to be the last element */
53 };
54
55 struct task_ctx {
56         int progress_enabled;
57         enum task_position tp;
58
59         struct task_info *info;
60 };
61
62 static u64 bytes_used = 0;
63 static u64 total_csum_bytes = 0;
64 static u64 total_btree_bytes = 0;
65 static u64 total_fs_tree_bytes = 0;
66 static u64 total_extent_tree_bytes = 0;
67 static u64 btree_space_waste = 0;
68 static u64 data_bytes_allocated = 0;
69 static u64 data_bytes_referenced = 0;
70 static LIST_HEAD(duplicate_extents);
71 static LIST_HEAD(delete_items);
72 static int no_holes = 0;
73 static int init_extent_tree = 0;
74 static int check_data_csum = 0;
75 static struct btrfs_fs_info *global_info;
76 static struct task_ctx ctx = { 0 };
77 static struct cache_tree *roots_info_cache = NULL;
78
79 enum btrfs_check_mode {
80         CHECK_MODE_ORIGINAL,
81         CHECK_MODE_LOWMEM,
82         CHECK_MODE_UNKNOWN,
83         CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
84 };
85
86 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
87
88 #define ROOT_DIR_ERROR          (1<<1)  /* bad ROOT_DIR */
89 #define DIR_ITEM_MISSING        (1<<2)  /* DIR_ITEM not found */
90 #define DIR_ITEM_MISMATCH       (1<<3)  /* DIR_ITEM found but not match */
91 #define INODE_REF_MISSING       (1<<4)  /* INODE_REF/INODE_EXTREF not found */
92 #define INODE_ITEM_MISSING      (1<<5)  /* INODE_ITEM not found */
93 #define INODE_ITEM_MISMATCH     (1<<6)  /* INODE_ITEM found but not match */
94 #define FILE_EXTENT_ERROR       (1<<7)  /* bad FILE_EXTENT */
95 #define ODD_CSUM_ITEM           (1<<8)  /* CSUM_ITEM error */
96 #define CSUM_ITEM_MISSING       (1<<9)  /* CSUM_ITEM not found */
97 #define LINK_COUNT_ERROR        (1<<10) /* INODE_ITEM nlink count error */
98 #define NBYTES_ERROR            (1<<11) /* INODE_ITEM nbytes count error */
99 #define ISIZE_ERROR             (1<<12) /* INODE_ITEM size count error */
100 #define ORPHAN_ITEM             (1<<13) /* INODE_ITEM no reference */
101 #define NO_INODE_ITEM           (1<<14) /* no inode_item */
102 #define LAST_ITEM               (1<<15) /* Complete this tree traversal */
103 #define ROOT_REF_MISSING        (1<<16) /* ROOT_REF not found */
104 #define ROOT_REF_MISMATCH       (1<<17) /* ROOT_REF found but not match */
105 #define DIR_INDEX_MISSING       (1<<18) /* INODE_INDEX not found */
106 #define DIR_INDEX_MISMATCH      (1<<19) /* INODE_INDEX found but not match */
107 #define DIR_COUNT_AGAIN         (1<<20) /* DIR isize should be recalculated */
108 #define BG_ACCOUNTING_ERROR     (1<<21) /* Block group accounting error */
109
110 static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
111 {
112         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
113         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
114         struct data_backref *back1 = to_data_backref(ext1);
115         struct data_backref *back2 = to_data_backref(ext2);
116
117         WARN_ON(!ext1->is_data);
118         WARN_ON(!ext2->is_data);
119
120         /* parent and root are a union, so this covers both */
121         if (back1->parent > back2->parent)
122                 return 1;
123         if (back1->parent < back2->parent)
124                 return -1;
125
126         /* This is a full backref and the parents match. */
127         if (back1->node.full_backref)
128                 return 0;
129
130         if (back1->owner > back2->owner)
131                 return 1;
132         if (back1->owner < back2->owner)
133                 return -1;
134
135         if (back1->offset > back2->offset)
136                 return 1;
137         if (back1->offset < back2->offset)
138                 return -1;
139
140         if (back1->found_ref && back2->found_ref) {
141                 if (back1->disk_bytenr > back2->disk_bytenr)
142                         return 1;
143                 if (back1->disk_bytenr < back2->disk_bytenr)
144                         return -1;
145
146                 if (back1->bytes > back2->bytes)
147                         return 1;
148                 if (back1->bytes < back2->bytes)
149                         return -1;
150         }
151
152         return 0;
153 }
154
155 static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
156 {
157         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
158         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
159         struct tree_backref *back1 = to_tree_backref(ext1);
160         struct tree_backref *back2 = to_tree_backref(ext2);
161
162         WARN_ON(ext1->is_data);
163         WARN_ON(ext2->is_data);
164
165         /* parent and root are a union, so this covers both */
166         if (back1->parent > back2->parent)
167                 return 1;
168         if (back1->parent < back2->parent)
169                 return -1;
170
171         return 0;
172 }
173
174 static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
175 {
176         struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
177         struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
178
179         if (ext1->is_data > ext2->is_data)
180                 return 1;
181
182         if (ext1->is_data < ext2->is_data)
183                 return -1;
184
185         if (ext1->full_backref > ext2->full_backref)
186                 return 1;
187         if (ext1->full_backref < ext2->full_backref)
188                 return -1;
189
190         if (ext1->is_data)
191                 return compare_data_backref(node1, node2);
192         else
193                 return compare_tree_backref(node1, node2);
194 }
195
196 /*
197  * Error bit for low memory mode check.
198  *
199  * Currently no caller cares about it yet.  Just internal use for error
200  * classification.
201  */
202 #define BACKREF_MISSING         (1 << 0) /* Backref missing in extent tree */
203 #define BACKREF_MISMATCH        (1 << 1) /* Backref exists but does not match */
204 #define BYTES_UNALIGNED         (1 << 2) /* Some bytes are not aligned */
205 #define REFERENCER_MISSING      (1 << 3) /* Referencer not found */
206 #define REFERENCER_MISMATCH     (1 << 4) /* Referenceer found but does not match */
207 #define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
208 #define ITEM_SIZE_MISMATCH      (1 << 5) /* Bad item size */
209 #define UNKNOWN_TYPE            (1 << 6) /* Unknown type */
210 #define ACCOUNTING_MISMATCH     (1 << 7) /* Used space accounting error */
211 #define CHUNK_TYPE_MISMATCH     (1 << 8)
212
213 static void *print_status_check(void *p)
214 {
215         struct task_ctx *priv = p;
216         const char work_indicator[] = { '.', 'o', 'O', 'o' };
217         uint32_t count = 0;
218         static char *task_position_string[] = {
219                 "checking extents",
220                 "checking free space cache",
221                 "checking fs roots",
222         };
223
224         task_period_start(priv->info, 1000 /* 1s */);
225
226         if (priv->tp == TASK_NOTHING)
227                 return NULL;
228
229         while (1) {
230                 printf("%s [%c]\r", task_position_string[priv->tp],
231                                 work_indicator[count % 4]);
232                 count++;
233                 fflush(stdout);
234                 task_period_wait(priv->info);
235         }
236         return NULL;
237 }
238
239 static int print_status_return(void *p)
240 {
241         printf("\n");
242         fflush(stdout);
243
244         return 0;
245 }
246
247 static enum btrfs_check_mode parse_check_mode(const char *str)
248 {
249         if (strcmp(str, "lowmem") == 0)
250                 return CHECK_MODE_LOWMEM;
251         if (strcmp(str, "orig") == 0)
252                 return CHECK_MODE_ORIGINAL;
253         if (strcmp(str, "original") == 0)
254                 return CHECK_MODE_ORIGINAL;
255
256         return CHECK_MODE_UNKNOWN;
257 }
258
259 /* Compatible function to allow reuse of old codes */
260 static u64 first_extent_gap(struct rb_root *holes)
261 {
262         struct file_extent_hole *hole;
263
264         if (RB_EMPTY_ROOT(holes))
265                 return (u64)-1;
266
267         hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
268         return hole->start;
269 }
270
271 static int compare_hole(struct rb_node *node1, struct rb_node *node2)
272 {
273         struct file_extent_hole *hole1;
274         struct file_extent_hole *hole2;
275
276         hole1 = rb_entry(node1, struct file_extent_hole, node);
277         hole2 = rb_entry(node2, struct file_extent_hole, node);
278
279         if (hole1->start > hole2->start)
280                 return -1;
281         if (hole1->start < hole2->start)
282                 return 1;
283         /* Now hole1->start == hole2->start */
284         if (hole1->len >= hole2->len)
285                 /*
286                  * Hole 1 will be merge center
287                  * Same hole will be merged later
288                  */
289                 return -1;
290         /* Hole 2 will be merge center */
291         return 1;
292 }
293
294 /*
295  * Add a hole to the record
296  *
297  * This will do hole merge for copy_file_extent_holes(),
298  * which will ensure there won't be continuous holes.
299  */
300 static int add_file_extent_hole(struct rb_root *holes,
301                                 u64 start, u64 len)
302 {
303         struct file_extent_hole *hole;
304         struct file_extent_hole *prev = NULL;
305         struct file_extent_hole *next = NULL;
306
307         hole = malloc(sizeof(*hole));
308         if (!hole)
309                 return -ENOMEM;
310         hole->start = start;
311         hole->len = len;
312         /* Since compare will not return 0, no -EEXIST will happen */
313         rb_insert(holes, &hole->node, compare_hole);
314
315         /* simple merge with previous hole */
316         if (rb_prev(&hole->node))
317                 prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
318                                 node);
319         if (prev && prev->start + prev->len >= hole->start) {
320                 hole->len = hole->start + hole->len - prev->start;
321                 hole->start = prev->start;
322                 rb_erase(&prev->node, holes);
323                 free(prev);
324                 prev = NULL;
325         }
326
327         /* iterate merge with next holes */
328         while (1) {
329                 if (!rb_next(&hole->node))
330                         break;
331                 next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
332                                         node);
333                 if (hole->start + hole->len >= next->start) {
334                         if (hole->start + hole->len <= next->start + next->len)
335                                 hole->len = next->start + next->len -
336                                             hole->start;
337                         rb_erase(&next->node, holes);
338                         free(next);
339                         next = NULL;
340                 } else
341                         break;
342         }
343         return 0;
344 }
345
346 static int compare_hole_range(struct rb_node *node, void *data)
347 {
348         struct file_extent_hole *hole;
349         u64 start;
350
351         hole = (struct file_extent_hole *)data;
352         start = hole->start;
353
354         hole = rb_entry(node, struct file_extent_hole, node);
355         if (start < hole->start)
356                 return -1;
357         if (start >= hole->start && start < hole->start + hole->len)
358                 return 0;
359         return 1;
360 }
361
362 /*
363  * Delete a hole in the record
364  *
365  * This will do the hole split and is much restrict than add.
366  */
367 static int del_file_extent_hole(struct rb_root *holes,
368                                 u64 start, u64 len)
369 {
370         struct file_extent_hole *hole;
371         struct file_extent_hole tmp;
372         u64 prev_start = 0;
373         u64 prev_len = 0;
374         u64 next_start = 0;
375         u64 next_len = 0;
376         struct rb_node *node;
377         int have_prev = 0;
378         int have_next = 0;
379         int ret = 0;
380
381         tmp.start = start;
382         tmp.len = len;
383         node = rb_search(holes, &tmp, compare_hole_range, NULL);
384         if (!node)
385                 return -EEXIST;
386         hole = rb_entry(node, struct file_extent_hole, node);
387         if (start + len > hole->start + hole->len)
388                 return -EEXIST;
389
390         /*
391          * Now there will be no overlap, delete the hole and re-add the
392          * split(s) if they exists.
393          */
394         if (start > hole->start) {
395                 prev_start = hole->start;
396                 prev_len = start - hole->start;
397                 have_prev = 1;
398         }
399         if (hole->start + hole->len > start + len) {
400                 next_start = start + len;
401                 next_len = hole->start + hole->len - start - len;
402                 have_next = 1;
403         }
404         rb_erase(node, holes);
405         free(hole);
406         if (have_prev) {
407                 ret = add_file_extent_hole(holes, prev_start, prev_len);
408                 if (ret < 0)
409                         return ret;
410         }
411         if (have_next) {
412                 ret = add_file_extent_hole(holes, next_start, next_len);
413                 if (ret < 0)
414                         return ret;
415         }
416         return 0;
417 }
418
419 static int copy_file_extent_holes(struct rb_root *dst,
420                                   struct rb_root *src)
421 {
422         struct file_extent_hole *hole;
423         struct rb_node *node;
424         int ret = 0;
425
426         node = rb_first(src);
427         while (node) {
428                 hole = rb_entry(node, struct file_extent_hole, node);
429                 ret = add_file_extent_hole(dst, hole->start, hole->len);
430                 if (ret)
431                         break;
432                 node = rb_next(node);
433         }
434         return ret;
435 }
436
437 static void free_file_extent_holes(struct rb_root *holes)
438 {
439         struct rb_node *node;
440         struct file_extent_hole *hole;
441
442         node = rb_first(holes);
443         while (node) {
444                 hole = rb_entry(node, struct file_extent_hole, node);
445                 rb_erase(node, holes);
446                 free(hole);
447                 node = rb_first(holes);
448         }
449 }
450
451 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
452
453 static void record_root_in_trans(struct btrfs_trans_handle *trans,
454                                  struct btrfs_root *root)
455 {
456         if (root->last_trans != trans->transid) {
457                 root->track_dirty = 1;
458                 root->last_trans = trans->transid;
459                 root->commit_root = root->node;
460                 extent_buffer_get(root->node);
461         }
462 }
463
464 static u8 imode_to_type(u32 imode)
465 {
466 #define S_SHIFT 12
467         static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
468                 [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
469                 [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
470                 [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
471                 [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
472                 [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
473                 [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
474                 [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
475         };
476
477         return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
478 #undef S_SHIFT
479 }
480
481 static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
482 {
483         struct device_record *rec1;
484         struct device_record *rec2;
485
486         rec1 = rb_entry(node1, struct device_record, node);
487         rec2 = rb_entry(node2, struct device_record, node);
488         if (rec1->devid > rec2->devid)
489                 return -1;
490         else if (rec1->devid < rec2->devid)
491                 return 1;
492         else
493                 return 0;
494 }
495
496 static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
497 {
498         struct inode_record *rec;
499         struct inode_backref *backref;
500         struct inode_backref *orig;
501         struct inode_backref *tmp;
502         struct orphan_data_extent *src_orphan;
503         struct orphan_data_extent *dst_orphan;
504         struct rb_node *rb;
505         size_t size;
506         int ret;
507
508         rec = malloc(sizeof(*rec));
509         if (!rec)
510                 return ERR_PTR(-ENOMEM);
511         memcpy(rec, orig_rec, sizeof(*rec));
512         rec->refs = 1;
513         INIT_LIST_HEAD(&rec->backrefs);
514         INIT_LIST_HEAD(&rec->orphan_extents);
515         rec->holes = RB_ROOT;
516
517         list_for_each_entry(orig, &orig_rec->backrefs, list) {
518                 size = sizeof(*orig) + orig->namelen + 1;
519                 backref = malloc(size);
520                 if (!backref) {
521                         ret = -ENOMEM;
522                         goto cleanup;
523                 }
524                 memcpy(backref, orig, size);
525                 list_add_tail(&backref->list, &rec->backrefs);
526         }
527         list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
528                 dst_orphan = malloc(sizeof(*dst_orphan));
529                 if (!dst_orphan) {
530                         ret = -ENOMEM;
531                         goto cleanup;
532                 }
533                 memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
534                 list_add_tail(&dst_orphan->list, &rec->orphan_extents);
535         }
536         ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
537         if (ret < 0)
538                 goto cleanup_rb;
539
540         return rec;
541
542 cleanup_rb:
543         rb = rb_first(&rec->holes);
544         while (rb) {
545                 struct file_extent_hole *hole;
546
547                 hole = rb_entry(rb, struct file_extent_hole, node);
548                 rb = rb_next(rb);
549                 free(hole);
550         }
551
552 cleanup:
553         if (!list_empty(&rec->backrefs))
554                 list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
555                         list_del(&orig->list);
556                         free(orig);
557                 }
558
559         if (!list_empty(&rec->orphan_extents))
560                 list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
561                         list_del(&orig->list);
562                         free(orig);
563                 }
564
565         free(rec);
566
567         return ERR_PTR(ret);
568 }
569
570 static void print_orphan_data_extents(struct list_head *orphan_extents,
571                                       u64 objectid)
572 {
573         struct orphan_data_extent *orphan;
574
575         if (list_empty(orphan_extents))
576                 return;
577         printf("The following data extent is lost in tree %llu:\n",
578                objectid);
579         list_for_each_entry(orphan, orphan_extents, list) {
580                 printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
581                        orphan->objectid, orphan->offset, orphan->disk_bytenr,
582                        orphan->disk_len);
583         }
584 }
585
586 static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
587 {
588         u64 root_objectid = root->root_key.objectid;
589         int errors = rec->errors;
590
591         if (!errors)
592                 return;
593         /* reloc root errors, we print its corresponding fs root objectid*/
594         if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
595                 root_objectid = root->root_key.offset;
596                 fprintf(stderr, "reloc");
597         }
598         fprintf(stderr, "root %llu inode %llu errors %x",
599                 (unsigned long long) root_objectid,
600                 (unsigned long long) rec->ino, rec->errors);
601
602         if (errors & I_ERR_NO_INODE_ITEM)
603                 fprintf(stderr, ", no inode item");
604         if (errors & I_ERR_NO_ORPHAN_ITEM)
605                 fprintf(stderr, ", no orphan item");
606         if (errors & I_ERR_DUP_INODE_ITEM)
607                 fprintf(stderr, ", dup inode item");
608         if (errors & I_ERR_DUP_DIR_INDEX)
609                 fprintf(stderr, ", dup dir index");
610         if (errors & I_ERR_ODD_DIR_ITEM)
611                 fprintf(stderr, ", odd dir item");
612         if (errors & I_ERR_ODD_FILE_EXTENT)
613                 fprintf(stderr, ", odd file extent");
614         if (errors & I_ERR_BAD_FILE_EXTENT)
615                 fprintf(stderr, ", bad file extent");
616         if (errors & I_ERR_FILE_EXTENT_OVERLAP)
617                 fprintf(stderr, ", file extent overlap");
618         if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
619                 fprintf(stderr, ", file extent discount");
620         if (errors & I_ERR_DIR_ISIZE_WRONG)
621                 fprintf(stderr, ", dir isize wrong");
622         if (errors & I_ERR_FILE_NBYTES_WRONG)
623                 fprintf(stderr, ", nbytes wrong");
624         if (errors & I_ERR_ODD_CSUM_ITEM)
625                 fprintf(stderr, ", odd csum item");
626         if (errors & I_ERR_SOME_CSUM_MISSING)
627                 fprintf(stderr, ", some csum missing");
628         if (errors & I_ERR_LINK_COUNT_WRONG)
629                 fprintf(stderr, ", link count wrong");
630         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
631                 fprintf(stderr, ", orphan file extent");
632         fprintf(stderr, "\n");
633         /* Print the orphan extents if needed */
634         if (errors & I_ERR_FILE_EXTENT_ORPHAN)
635                 print_orphan_data_extents(&rec->orphan_extents, root->objectid);
636
637         /* Print the holes if needed */
638         if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
639                 struct file_extent_hole *hole;
640                 struct rb_node *node;
641                 int found = 0;
642
643                 node = rb_first(&rec->holes);
644                 fprintf(stderr, "Found file extent holes:\n");
645                 while (node) {
646                         found = 1;
647                         hole = rb_entry(node, struct file_extent_hole, node);
648                         fprintf(stderr, "\tstart: %llu, len: %llu\n",
649                                 hole->start, hole->len);
650                         node = rb_next(node);
651                 }
652                 if (!found)
653                         fprintf(stderr, "\tstart: 0, len: %llu\n",
654                                 round_up(rec->isize,
655                                          root->fs_info->sectorsize));
656         }
657 }
658
659 static void print_ref_error(int errors)
660 {
661         if (errors & REF_ERR_NO_DIR_ITEM)
662                 fprintf(stderr, ", no dir item");
663         if (errors & REF_ERR_NO_DIR_INDEX)
664                 fprintf(stderr, ", no dir index");
665         if (errors & REF_ERR_NO_INODE_REF)
666                 fprintf(stderr, ", no inode ref");
667         if (errors & REF_ERR_DUP_DIR_ITEM)
668                 fprintf(stderr, ", dup dir item");
669         if (errors & REF_ERR_DUP_DIR_INDEX)
670                 fprintf(stderr, ", dup dir index");
671         if (errors & REF_ERR_DUP_INODE_REF)
672                 fprintf(stderr, ", dup inode ref");
673         if (errors & REF_ERR_INDEX_UNMATCH)
674                 fprintf(stderr, ", index mismatch");
675         if (errors & REF_ERR_FILETYPE_UNMATCH)
676                 fprintf(stderr, ", filetype mismatch");
677         if (errors & REF_ERR_NAME_TOO_LONG)
678                 fprintf(stderr, ", name too long");
679         if (errors & REF_ERR_NO_ROOT_REF)
680                 fprintf(stderr, ", no root ref");
681         if (errors & REF_ERR_NO_ROOT_BACKREF)
682                 fprintf(stderr, ", no root backref");
683         if (errors & REF_ERR_DUP_ROOT_REF)
684                 fprintf(stderr, ", dup root ref");
685         if (errors & REF_ERR_DUP_ROOT_BACKREF)
686                 fprintf(stderr, ", dup root backref");
687         fprintf(stderr, "\n");
688 }
689
690 static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
691                                           u64 ino, int mod)
692 {
693         struct ptr_node *node;
694         struct cache_extent *cache;
695         struct inode_record *rec = NULL;
696         int ret;
697
698         cache = lookup_cache_extent(inode_cache, ino, 1);
699         if (cache) {
700                 node = container_of(cache, struct ptr_node, cache);
701                 rec = node->data;
702                 if (mod && rec->refs > 1) {
703                         node->data = clone_inode_rec(rec);
704                         if (IS_ERR(node->data))
705                                 return node->data;
706                         rec->refs--;
707                         rec = node->data;
708                 }
709         } else if (mod) {
710                 rec = calloc(1, sizeof(*rec));
711                 if (!rec)
712                         return ERR_PTR(-ENOMEM);
713                 rec->ino = ino;
714                 rec->extent_start = (u64)-1;
715                 rec->refs = 1;
716                 INIT_LIST_HEAD(&rec->backrefs);
717                 INIT_LIST_HEAD(&rec->orphan_extents);
718                 rec->holes = RB_ROOT;
719
720                 node = malloc(sizeof(*node));
721                 if (!node) {
722                         free(rec);
723                         return ERR_PTR(-ENOMEM);
724                 }
725                 node->cache.start = ino;
726                 node->cache.size = 1;
727                 node->data = rec;
728
729                 if (ino == BTRFS_FREE_INO_OBJECTID)
730                         rec->found_link = 1;
731
732                 ret = insert_cache_extent(inode_cache, &node->cache);
733                 if (ret)
734                         return ERR_PTR(-EEXIST);
735         }
736         return rec;
737 }
738
739 static void free_orphan_data_extents(struct list_head *orphan_extents)
740 {
741         struct orphan_data_extent *orphan;
742
743         while (!list_empty(orphan_extents)) {
744                 orphan = list_entry(orphan_extents->next,
745                                     struct orphan_data_extent, list);
746                 list_del(&orphan->list);
747                 free(orphan);
748         }
749 }
750
751 static void free_inode_rec(struct inode_record *rec)
752 {
753         struct inode_backref *backref;
754
755         if (--rec->refs > 0)
756                 return;
757
758         while (!list_empty(&rec->backrefs)) {
759                 backref = to_inode_backref(rec->backrefs.next);
760                 list_del(&backref->list);
761                 free(backref);
762         }
763         free_orphan_data_extents(&rec->orphan_extents);
764         free_file_extent_holes(&rec->holes);
765         free(rec);
766 }
767
768 static int can_free_inode_rec(struct inode_record *rec)
769 {
770         if (!rec->errors && rec->checked && rec->found_inode_item &&
771             rec->nlink == rec->found_link && list_empty(&rec->backrefs))
772                 return 1;
773         return 0;
774 }
775
776 static void maybe_free_inode_rec(struct cache_tree *inode_cache,
777                                  struct inode_record *rec)
778 {
779         struct cache_extent *cache;
780         struct inode_backref *tmp, *backref;
781         struct ptr_node *node;
782         u8 filetype;
783
784         if (!rec->found_inode_item)
785                 return;
786
787         filetype = imode_to_type(rec->imode);
788         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
789                 if (backref->found_dir_item && backref->found_dir_index) {
790                         if (backref->filetype != filetype)
791                                 backref->errors |= REF_ERR_FILETYPE_UNMATCH;
792                         if (!backref->errors && backref->found_inode_ref &&
793                             rec->nlink == rec->found_link) {
794                                 list_del(&backref->list);
795                                 free(backref);
796                         }
797                 }
798         }
799
800         if (!rec->checked || rec->merging)
801                 return;
802
803         if (S_ISDIR(rec->imode)) {
804                 if (rec->found_size != rec->isize)
805                         rec->errors |= I_ERR_DIR_ISIZE_WRONG;
806                 if (rec->found_file_extent)
807                         rec->errors |= I_ERR_ODD_FILE_EXTENT;
808         } else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
809                 if (rec->found_dir_item)
810                         rec->errors |= I_ERR_ODD_DIR_ITEM;
811                 if (rec->found_size != rec->nbytes)
812                         rec->errors |= I_ERR_FILE_NBYTES_WRONG;
813                 if (rec->nlink > 0 && !no_holes &&
814                     (rec->extent_end < rec->isize ||
815                      first_extent_gap(&rec->holes) < rec->isize))
816                         rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
817         }
818
819         if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
820                 if (rec->found_csum_item && rec->nodatasum)
821                         rec->errors |= I_ERR_ODD_CSUM_ITEM;
822                 if (rec->some_csum_missing && !rec->nodatasum)
823                         rec->errors |= I_ERR_SOME_CSUM_MISSING;
824         }
825
826         BUG_ON(rec->refs != 1);
827         if (can_free_inode_rec(rec)) {
828                 cache = lookup_cache_extent(inode_cache, rec->ino, 1);
829                 node = container_of(cache, struct ptr_node, cache);
830                 BUG_ON(node->data != rec);
831                 remove_cache_extent(inode_cache, &node->cache);
832                 free(node);
833                 free_inode_rec(rec);
834         }
835 }
836
837 static int check_orphan_item(struct btrfs_root *root, u64 ino)
838 {
839         struct btrfs_path path;
840         struct btrfs_key key;
841         int ret;
842
843         key.objectid = BTRFS_ORPHAN_OBJECTID;
844         key.type = BTRFS_ORPHAN_ITEM_KEY;
845         key.offset = ino;
846
847         btrfs_init_path(&path);
848         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
849         btrfs_release_path(&path);
850         if (ret > 0)
851                 ret = -ENOENT;
852         return ret;
853 }
854
855 static int process_inode_item(struct extent_buffer *eb,
856                               int slot, struct btrfs_key *key,
857                               struct shared_node *active_node)
858 {
859         struct inode_record *rec;
860         struct btrfs_inode_item *item;
861
862         rec = active_node->current;
863         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
864         if (rec->found_inode_item) {
865                 rec->errors |= I_ERR_DUP_INODE_ITEM;
866                 return 1;
867         }
868         item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
869         rec->nlink = btrfs_inode_nlink(eb, item);
870         rec->isize = btrfs_inode_size(eb, item);
871         rec->nbytes = btrfs_inode_nbytes(eb, item);
872         rec->imode = btrfs_inode_mode(eb, item);
873         if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
874                 rec->nodatasum = 1;
875         rec->found_inode_item = 1;
876         if (rec->nlink == 0)
877                 rec->errors |= I_ERR_NO_ORPHAN_ITEM;
878         maybe_free_inode_rec(&active_node->inode_cache, rec);
879         return 0;
880 }
881
882 static struct inode_backref *get_inode_backref(struct inode_record *rec,
883                                                 const char *name,
884                                                 int namelen, u64 dir)
885 {
886         struct inode_backref *backref;
887
888         list_for_each_entry(backref, &rec->backrefs, list) {
889                 if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
890                         break;
891                 if (backref->dir != dir || backref->namelen != namelen)
892                         continue;
893                 if (memcmp(name, backref->name, namelen))
894                         continue;
895                 return backref;
896         }
897
898         backref = malloc(sizeof(*backref) + namelen + 1);
899         if (!backref)
900                 return NULL;
901         memset(backref, 0, sizeof(*backref));
902         backref->dir = dir;
903         backref->namelen = namelen;
904         memcpy(backref->name, name, namelen);
905         backref->name[namelen] = '\0';
906         list_add_tail(&backref->list, &rec->backrefs);
907         return backref;
908 }
909
910 static int add_inode_backref(struct cache_tree *inode_cache,
911                              u64 ino, u64 dir, u64 index,
912                              const char *name, int namelen,
913                              u8 filetype, u8 itemtype, int errors)
914 {
915         struct inode_record *rec;
916         struct inode_backref *backref;
917
918         rec = get_inode_rec(inode_cache, ino, 1);
919         BUG_ON(IS_ERR(rec));
920         backref = get_inode_backref(rec, name, namelen, dir);
921         BUG_ON(!backref);
922         if (errors)
923                 backref->errors |= errors;
924         if (itemtype == BTRFS_DIR_INDEX_KEY) {
925                 if (backref->found_dir_index)
926                         backref->errors |= REF_ERR_DUP_DIR_INDEX;
927                 if (backref->found_inode_ref && backref->index != index)
928                         backref->errors |= REF_ERR_INDEX_UNMATCH;
929                 if (backref->found_dir_item && backref->filetype != filetype)
930                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
931
932                 backref->index = index;
933                 backref->filetype = filetype;
934                 backref->found_dir_index = 1;
935         } else if (itemtype == BTRFS_DIR_ITEM_KEY) {
936                 rec->found_link++;
937                 if (backref->found_dir_item)
938                         backref->errors |= REF_ERR_DUP_DIR_ITEM;
939                 if (backref->found_dir_index && backref->filetype != filetype)
940                         backref->errors |= REF_ERR_FILETYPE_UNMATCH;
941
942                 backref->filetype = filetype;
943                 backref->found_dir_item = 1;
944         } else if ((itemtype == BTRFS_INODE_REF_KEY) ||
945                    (itemtype == BTRFS_INODE_EXTREF_KEY)) {
946                 if (backref->found_inode_ref)
947                         backref->errors |= REF_ERR_DUP_INODE_REF;
948                 if (backref->found_dir_index && backref->index != index)
949                         backref->errors |= REF_ERR_INDEX_UNMATCH;
950                 else
951                         backref->index = index;
952
953                 backref->ref_type = itemtype;
954                 backref->found_inode_ref = 1;
955         } else {
956                 BUG_ON(1);
957         }
958
959         maybe_free_inode_rec(inode_cache, rec);
960         return 0;
961 }
962
963 static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
964                             struct cache_tree *dst_cache)
965 {
966         struct inode_backref *backref;
967         u32 dir_count = 0;
968         int ret = 0;
969
970         dst->merging = 1;
971         list_for_each_entry(backref, &src->backrefs, list) {
972                 if (backref->found_dir_index) {
973                         add_inode_backref(dst_cache, dst->ino, backref->dir,
974                                         backref->index, backref->name,
975                                         backref->namelen, backref->filetype,
976                                         BTRFS_DIR_INDEX_KEY, backref->errors);
977                 }
978                 if (backref->found_dir_item) {
979                         dir_count++;
980                         add_inode_backref(dst_cache, dst->ino,
981                                         backref->dir, 0, backref->name,
982                                         backref->namelen, backref->filetype,
983                                         BTRFS_DIR_ITEM_KEY, backref->errors);
984                 }
985                 if (backref->found_inode_ref) {
986                         add_inode_backref(dst_cache, dst->ino,
987                                         backref->dir, backref->index,
988                                         backref->name, backref->namelen, 0,
989                                         backref->ref_type, backref->errors);
990                 }
991         }
992
993         if (src->found_dir_item)
994                 dst->found_dir_item = 1;
995         if (src->found_file_extent)
996                 dst->found_file_extent = 1;
997         if (src->found_csum_item)
998                 dst->found_csum_item = 1;
999         if (src->some_csum_missing)
1000                 dst->some_csum_missing = 1;
1001         if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
1002                 ret = copy_file_extent_holes(&dst->holes, &src->holes);
1003                 if (ret < 0)
1004                         return ret;
1005         }
1006
1007         BUG_ON(src->found_link < dir_count);
1008         dst->found_link += src->found_link - dir_count;
1009         dst->found_size += src->found_size;
1010         if (src->extent_start != (u64)-1) {
1011                 if (dst->extent_start == (u64)-1) {
1012                         dst->extent_start = src->extent_start;
1013                         dst->extent_end = src->extent_end;
1014                 } else {
1015                         if (dst->extent_end > src->extent_start)
1016                                 dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1017                         else if (dst->extent_end < src->extent_start) {
1018                                 ret = add_file_extent_hole(&dst->holes,
1019                                         dst->extent_end,
1020                                         src->extent_start - dst->extent_end);
1021                         }
1022                         if (dst->extent_end < src->extent_end)
1023                                 dst->extent_end = src->extent_end;
1024                 }
1025         }
1026
1027         dst->errors |= src->errors;
1028         if (src->found_inode_item) {
1029                 if (!dst->found_inode_item) {
1030                         dst->nlink = src->nlink;
1031                         dst->isize = src->isize;
1032                         dst->nbytes = src->nbytes;
1033                         dst->imode = src->imode;
1034                         dst->nodatasum = src->nodatasum;
1035                         dst->found_inode_item = 1;
1036                 } else {
1037                         dst->errors |= I_ERR_DUP_INODE_ITEM;
1038                 }
1039         }
1040         dst->merging = 0;
1041
1042         return 0;
1043 }
1044
1045 static int splice_shared_node(struct shared_node *src_node,
1046                               struct shared_node *dst_node)
1047 {
1048         struct cache_extent *cache;
1049         struct ptr_node *node, *ins;
1050         struct cache_tree *src, *dst;
1051         struct inode_record *rec, *conflict;
1052         u64 current_ino = 0;
1053         int splice = 0;
1054         int ret;
1055
1056         if (--src_node->refs == 0)
1057                 splice = 1;
1058         if (src_node->current)
1059                 current_ino = src_node->current->ino;
1060
1061         src = &src_node->root_cache;
1062         dst = &dst_node->root_cache;
1063 again:
1064         cache = search_cache_extent(src, 0);
1065         while (cache) {
1066                 node = container_of(cache, struct ptr_node, cache);
1067                 rec = node->data;
1068                 cache = next_cache_extent(cache);
1069
1070                 if (splice) {
1071                         remove_cache_extent(src, &node->cache);
1072                         ins = node;
1073                 } else {
1074                         ins = malloc(sizeof(*ins));
1075                         BUG_ON(!ins);
1076                         ins->cache.start = node->cache.start;
1077                         ins->cache.size = node->cache.size;
1078                         ins->data = rec;
1079                         rec->refs++;
1080                 }
1081                 ret = insert_cache_extent(dst, &ins->cache);
1082                 if (ret == -EEXIST) {
1083                         conflict = get_inode_rec(dst, rec->ino, 1);
1084                         BUG_ON(IS_ERR(conflict));
1085                         merge_inode_recs(rec, conflict, dst);
1086                         if (rec->checked) {
1087                                 conflict->checked = 1;
1088                                 if (dst_node->current == conflict)
1089                                         dst_node->current = NULL;
1090                         }
1091                         maybe_free_inode_rec(dst, conflict);
1092                         free_inode_rec(rec);
1093                         free(ins);
1094                 } else {
1095                         BUG_ON(ret);
1096                 }
1097         }
1098
1099         if (src == &src_node->root_cache) {
1100                 src = &src_node->inode_cache;
1101                 dst = &dst_node->inode_cache;
1102                 goto again;
1103         }
1104
1105         if (current_ino > 0 && (!dst_node->current ||
1106             current_ino > dst_node->current->ino)) {
1107                 if (dst_node->current) {
1108                         dst_node->current->checked = 1;
1109                         maybe_free_inode_rec(dst, dst_node->current);
1110                 }
1111                 dst_node->current = get_inode_rec(dst, current_ino, 1);
1112                 BUG_ON(IS_ERR(dst_node->current));
1113         }
1114         return 0;
1115 }
1116
1117 static void free_inode_ptr(struct cache_extent *cache)
1118 {
1119         struct ptr_node *node;
1120         struct inode_record *rec;
1121
1122         node = container_of(cache, struct ptr_node, cache);
1123         rec = node->data;
1124         free_inode_rec(rec);
1125         free(node);
1126 }
1127
1128 FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
1129
1130 static struct shared_node *find_shared_node(struct cache_tree *shared,
1131                                             u64 bytenr)
1132 {
1133         struct cache_extent *cache;
1134         struct shared_node *node;
1135
1136         cache = lookup_cache_extent(shared, bytenr, 1);
1137         if (cache) {
1138                 node = container_of(cache, struct shared_node, cache);
1139                 return node;
1140         }
1141         return NULL;
1142 }
1143
1144 static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
1145 {
1146         int ret;
1147         struct shared_node *node;
1148
1149         node = calloc(1, sizeof(*node));
1150         if (!node)
1151                 return -ENOMEM;
1152         node->cache.start = bytenr;
1153         node->cache.size = 1;
1154         cache_tree_init(&node->root_cache);
1155         cache_tree_init(&node->inode_cache);
1156         node->refs = refs;
1157
1158         ret = insert_cache_extent(shared, &node->cache);
1159
1160         return ret;
1161 }
1162
1163 static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
1164                              struct walk_control *wc, int level)
1165 {
1166         struct shared_node *node;
1167         struct shared_node *dest;
1168         int ret;
1169
1170         if (level == wc->active_node)
1171                 return 0;
1172
1173         BUG_ON(wc->active_node <= level);
1174         node = find_shared_node(&wc->shared, bytenr);
1175         if (!node) {
1176                 ret = add_shared_node(&wc->shared, bytenr, refs);
1177                 BUG_ON(ret);
1178                 node = find_shared_node(&wc->shared, bytenr);
1179                 wc->nodes[level] = node;
1180                 wc->active_node = level;
1181                 return 0;
1182         }
1183
1184         if (wc->root_level == wc->active_node &&
1185             btrfs_root_refs(&root->root_item) == 0) {
1186                 if (--node->refs == 0) {
1187                         free_inode_recs_tree(&node->root_cache);
1188                         free_inode_recs_tree(&node->inode_cache);
1189                         remove_cache_extent(&wc->shared, &node->cache);
1190                         free(node);
1191                 }
1192                 return 1;
1193         }
1194
1195         dest = wc->nodes[wc->active_node];
1196         splice_shared_node(node, dest);
1197         if (node->refs == 0) {
1198                 remove_cache_extent(&wc->shared, &node->cache);
1199                 free(node);
1200         }
1201         return 1;
1202 }
1203
1204 static int leave_shared_node(struct btrfs_root *root,
1205                              struct walk_control *wc, int level)
1206 {
1207         struct shared_node *node;
1208         struct shared_node *dest;
1209         int i;
1210
1211         if (level == wc->root_level)
1212                 return 0;
1213
1214         for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
1215                 if (wc->nodes[i])
1216                         break;
1217         }
1218         BUG_ON(i >= BTRFS_MAX_LEVEL);
1219
1220         node = wc->nodes[wc->active_node];
1221         wc->nodes[wc->active_node] = NULL;
1222         wc->active_node = i;
1223
1224         dest = wc->nodes[wc->active_node];
1225         if (wc->active_node < wc->root_level ||
1226             btrfs_root_refs(&root->root_item) > 0) {
1227                 BUG_ON(node->refs <= 1);
1228                 splice_shared_node(node, dest);
1229         } else {
1230                 BUG_ON(node->refs < 2);
1231                 node->refs--;
1232         }
1233         return 0;
1234 }
1235
1236 /*
1237  * Returns:
1238  * < 0 - on error
1239  * 1   - if the root with id child_root_id is a child of root parent_root_id
1240  * 0   - if the root child_root_id isn't a child of the root parent_root_id but
1241  *       has other root(s) as parent(s)
1242  * 2   - if the root child_root_id doesn't have any parent roots
1243  */
1244 static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
1245                          u64 child_root_id)
1246 {
1247         struct btrfs_path path;
1248         struct btrfs_key key;
1249         struct extent_buffer *leaf;
1250         int has_parent = 0;
1251         int ret;
1252
1253         btrfs_init_path(&path);
1254
1255         key.objectid = parent_root_id;
1256         key.type = BTRFS_ROOT_REF_KEY;
1257         key.offset = child_root_id;
1258         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1259                                 0, 0);
1260         if (ret < 0)
1261                 return ret;
1262         btrfs_release_path(&path);
1263         if (!ret)
1264                 return 1;
1265
1266         key.objectid = child_root_id;
1267         key.type = BTRFS_ROOT_BACKREF_KEY;
1268         key.offset = 0;
1269         ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
1270                                 0, 0);
1271         if (ret < 0)
1272                 goto out;
1273
1274         while (1) {
1275                 leaf = path.nodes[0];
1276                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1277                         ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
1278                         if (ret)
1279                                 break;
1280                         leaf = path.nodes[0];
1281                 }
1282
1283                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1284                 if (key.objectid != child_root_id ||
1285                     key.type != BTRFS_ROOT_BACKREF_KEY)
1286                         break;
1287
1288                 has_parent = 1;
1289
1290                 if (key.offset == parent_root_id) {
1291                         btrfs_release_path(&path);
1292                         return 1;
1293                 }
1294
1295                 path.slots[0]++;
1296         }
1297 out:
1298         btrfs_release_path(&path);
1299         if (ret < 0)
1300                 return ret;
1301         return has_parent ? 0 : 2;
1302 }
1303
1304 static int process_dir_item(struct extent_buffer *eb,
1305                             int slot, struct btrfs_key *key,
1306                             struct shared_node *active_node)
1307 {
1308         u32 total;
1309         u32 cur = 0;
1310         u32 len;
1311         u32 name_len;
1312         u32 data_len;
1313         int error;
1314         int nritems = 0;
1315         u8 filetype;
1316         struct btrfs_dir_item *di;
1317         struct inode_record *rec;
1318         struct cache_tree *root_cache;
1319         struct cache_tree *inode_cache;
1320         struct btrfs_key location;
1321         char namebuf[BTRFS_NAME_LEN];
1322
1323         root_cache = &active_node->root_cache;
1324         inode_cache = &active_node->inode_cache;
1325         rec = active_node->current;
1326         rec->found_dir_item = 1;
1327
1328         di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1329         total = btrfs_item_size_nr(eb, slot);
1330         while (cur < total) {
1331                 nritems++;
1332                 btrfs_dir_item_key_to_cpu(eb, di, &location);
1333                 name_len = btrfs_dir_name_len(eb, di);
1334                 data_len = btrfs_dir_data_len(eb, di);
1335                 filetype = btrfs_dir_type(eb, di);
1336
1337                 rec->found_size += name_len;
1338                 if (cur + sizeof(*di) + name_len > total ||
1339                     name_len > BTRFS_NAME_LEN) {
1340                         error = REF_ERR_NAME_TOO_LONG;
1341
1342                         if (cur + sizeof(*di) > total)
1343                                 break;
1344                         len = min_t(u32, total - cur - sizeof(*di),
1345                                     BTRFS_NAME_LEN);
1346                 } else {
1347                         len = name_len;
1348                         error = 0;
1349                 }
1350
1351                 read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
1352
1353                 if (key->type == BTRFS_DIR_ITEM_KEY &&
1354                     key->offset != btrfs_name_hash(namebuf, len)) {
1355                         rec->errors |= I_ERR_ODD_DIR_ITEM;
1356                         error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
1357                         key->objectid, key->offset, namebuf, len, filetype,
1358                         key->offset, btrfs_name_hash(namebuf, len));
1359                 }
1360
1361                 if (location.type == BTRFS_INODE_ITEM_KEY) {
1362                         add_inode_backref(inode_cache, location.objectid,
1363                                           key->objectid, key->offset, namebuf,
1364                                           len, filetype, key->type, error);
1365                 } else if (location.type == BTRFS_ROOT_ITEM_KEY) {
1366                         add_inode_backref(root_cache, location.objectid,
1367                                           key->objectid, key->offset,
1368                                           namebuf, len, filetype,
1369                                           key->type, error);
1370                 } else {
1371                         fprintf(stderr,
1372                                 "unknown location type %d in DIR_ITEM[%llu %llu]\n",
1373                                 location.type, key->objectid, key->offset);
1374                         add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
1375                                           key->objectid, key->offset, namebuf,
1376                                           len, filetype, key->type, error);
1377                 }
1378
1379                 len = sizeof(*di) + name_len + data_len;
1380                 di = (struct btrfs_dir_item *)((char *)di + len);
1381                 cur += len;
1382         }
1383         if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
1384                 rec->errors |= I_ERR_DUP_DIR_INDEX;
1385
1386         return 0;
1387 }
1388
1389 static int process_inode_ref(struct extent_buffer *eb,
1390                              int slot, struct btrfs_key *key,
1391                              struct shared_node *active_node)
1392 {
1393         u32 total;
1394         u32 cur = 0;
1395         u32 len;
1396         u32 name_len;
1397         u64 index;
1398         int error;
1399         struct cache_tree *inode_cache;
1400         struct btrfs_inode_ref *ref;
1401         char namebuf[BTRFS_NAME_LEN];
1402
1403         inode_cache = &active_node->inode_cache;
1404
1405         ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
1406         total = btrfs_item_size_nr(eb, slot);
1407         while (cur < total) {
1408                 name_len = btrfs_inode_ref_name_len(eb, ref);
1409                 index = btrfs_inode_ref_index(eb, ref);
1410
1411                 /* inode_ref + namelen should not cross item boundary */
1412                 if (cur + sizeof(*ref) + name_len > total ||
1413                     name_len > BTRFS_NAME_LEN) {
1414                         if (total < cur + sizeof(*ref))
1415                                 break;
1416
1417                         /* Still try to read out the remaining part */
1418                         len = min_t(u32, total - cur - sizeof(*ref),
1419                                     BTRFS_NAME_LEN);
1420                         error = REF_ERR_NAME_TOO_LONG;
1421                 } else {
1422                         len = name_len;
1423                         error = 0;
1424                 }
1425
1426                 read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
1427                 add_inode_backref(inode_cache, key->objectid, key->offset,
1428                                   index, namebuf, len, 0, key->type, error);
1429
1430                 len = sizeof(*ref) + name_len;
1431                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
1432                 cur += len;
1433         }
1434         return 0;
1435 }
1436
1437 static int process_inode_extref(struct extent_buffer *eb,
1438                                 int slot, struct btrfs_key *key,
1439                                 struct shared_node *active_node)
1440 {
1441         u32 total;
1442         u32 cur = 0;
1443         u32 len;
1444         u32 name_len;
1445         u64 index;
1446         u64 parent;
1447         int error;
1448         struct cache_tree *inode_cache;
1449         struct btrfs_inode_extref *extref;
1450         char namebuf[BTRFS_NAME_LEN];
1451
1452         inode_cache = &active_node->inode_cache;
1453
1454         extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
1455         total = btrfs_item_size_nr(eb, slot);
1456         while (cur < total) {
1457                 name_len = btrfs_inode_extref_name_len(eb, extref);
1458                 index = btrfs_inode_extref_index(eb, extref);
1459                 parent = btrfs_inode_extref_parent(eb, extref);
1460                 if (name_len <= BTRFS_NAME_LEN) {
1461                         len = name_len;
1462                         error = 0;
1463                 } else {
1464                         len = BTRFS_NAME_LEN;
1465                         error = REF_ERR_NAME_TOO_LONG;
1466                 }
1467                 read_extent_buffer(eb, namebuf,
1468                                    (unsigned long)(extref + 1), len);
1469                 add_inode_backref(inode_cache, key->objectid, parent,
1470                                   index, namebuf, len, 0, key->type, error);
1471
1472                 len = sizeof(*extref) + name_len;
1473                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
1474                 cur += len;
1475         }
1476         return 0;
1477
1478 }
1479
1480 static int count_csum_range(struct btrfs_root *root, u64 start,
1481                             u64 len, u64 *found)
1482 {
1483         struct btrfs_key key;
1484         struct btrfs_path path;
1485         struct extent_buffer *leaf;
1486         int ret;
1487         size_t size;
1488         *found = 0;
1489         u64 csum_end;
1490         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
1491
1492         btrfs_init_path(&path);
1493
1494         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1495         key.offset = start;
1496         key.type = BTRFS_EXTENT_CSUM_KEY;
1497
1498         ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
1499                                 &key, &path, 0, 0);
1500         if (ret < 0)
1501                 goto out;
1502         if (ret > 0 && path.slots[0] > 0) {
1503                 leaf = path.nodes[0];
1504                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
1505                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
1506                     key.type == BTRFS_EXTENT_CSUM_KEY)
1507                         path.slots[0]--;
1508         }
1509
1510         while (len > 0) {
1511                 leaf = path.nodes[0];
1512                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
1513                         ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
1514                         if (ret > 0)
1515                                 break;
1516                         else if (ret < 0)
1517                                 goto out;
1518                         leaf = path.nodes[0];
1519                 }
1520
1521                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1522                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
1523                     key.type != BTRFS_EXTENT_CSUM_KEY)
1524                         break;
1525
1526                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
1527                 if (key.offset >= start + len)
1528                         break;
1529
1530                 if (key.offset > start)
1531                         start = key.offset;
1532
1533                 size = btrfs_item_size_nr(leaf, path.slots[0]);
1534                 csum_end = key.offset + (size / csum_size) *
1535                            root->fs_info->sectorsize;
1536                 if (csum_end > start) {
1537                         size = min(csum_end - start, len);
1538                         len -= size;
1539                         start += size;
1540                         *found += size;
1541                 }
1542
1543                 path.slots[0]++;
1544         }
1545 out:
1546         btrfs_release_path(&path);
1547         if (ret < 0)
1548                 return ret;
1549         return 0;
1550 }
1551
1552 static int process_file_extent(struct btrfs_root *root,
1553                                 struct extent_buffer *eb,
1554                                 int slot, struct btrfs_key *key,
1555                                 struct shared_node *active_node)
1556 {
1557         struct inode_record *rec;
1558         struct btrfs_file_extent_item *fi;
1559         u64 num_bytes = 0;
1560         u64 disk_bytenr = 0;
1561         u64 extent_offset = 0;
1562         u64 mask = root->fs_info->sectorsize - 1;
1563         int extent_type;
1564         int ret;
1565
1566         rec = active_node->current;
1567         BUG_ON(rec->ino != key->objectid || rec->refs > 1);
1568         rec->found_file_extent = 1;
1569
1570         if (rec->extent_start == (u64)-1) {
1571                 rec->extent_start = key->offset;
1572                 rec->extent_end = key->offset;
1573         }
1574
1575         if (rec->extent_end > key->offset)
1576                 rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
1577         else if (rec->extent_end < key->offset) {
1578                 ret = add_file_extent_hole(&rec->holes, rec->extent_end,
1579                                            key->offset - rec->extent_end);
1580                 if (ret < 0)
1581                         return ret;
1582         }
1583
1584         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
1585         extent_type = btrfs_file_extent_type(eb, fi);
1586
1587         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1588                 num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
1589                 if (num_bytes == 0)
1590                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1591                 rec->found_size += num_bytes;
1592                 num_bytes = (num_bytes + mask) & ~mask;
1593         } else if (extent_type == BTRFS_FILE_EXTENT_REG ||
1594                    extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1595                 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1596                 disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1597                 extent_offset = btrfs_file_extent_offset(eb, fi);
1598                 if (num_bytes == 0 || (num_bytes & mask))
1599                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1600                 if (num_bytes + extent_offset >
1601                     btrfs_file_extent_ram_bytes(eb, fi))
1602                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1603                 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
1604                     (btrfs_file_extent_compression(eb, fi) ||
1605                      btrfs_file_extent_encryption(eb, fi) ||
1606                      btrfs_file_extent_other_encoding(eb, fi)))
1607                         rec->errors |= I_ERR_BAD_FILE_EXTENT;
1608                 if (disk_bytenr > 0)
1609                         rec->found_size += num_bytes;
1610         } else {
1611                 rec->errors |= I_ERR_BAD_FILE_EXTENT;
1612         }
1613         rec->extent_end = key->offset + num_bytes;
1614
1615         /*
1616          * The data reloc tree will copy full extents into its inode and then
1617          * copy the corresponding csums.  Because the extent it copied could be
1618          * a preallocated extent that hasn't been written to yet there may be no
1619          * csums to copy, ergo we won't have csums for our file extent.  This is
1620          * ok so just don't bother checking csums if the inode belongs to the
1621          * data reloc tree.
1622          */
1623         if (disk_bytenr > 0 &&
1624             btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
1625                 u64 found;
1626                 if (btrfs_file_extent_compression(eb, fi))
1627                         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1628                 else
1629                         disk_bytenr += extent_offset;
1630
1631                 ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
1632                 if (ret < 0)
1633                         return ret;
1634                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
1635                         if (found > 0)
1636                                 rec->found_csum_item = 1;
1637                         if (found < num_bytes)
1638                                 rec->some_csum_missing = 1;
1639                 } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1640                         if (found > 0)
1641                                 rec->errors |= I_ERR_ODD_CSUM_ITEM;
1642                 }
1643         }
1644         return 0;
1645 }
1646
1647 static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
1648                             struct walk_control *wc)
1649 {
1650         struct btrfs_key key;
1651         u32 nritems;
1652         int i;
1653         int ret = 0;
1654         struct cache_tree *inode_cache;
1655         struct shared_node *active_node;
1656
1657         if (wc->root_level == wc->active_node &&
1658             btrfs_root_refs(&root->root_item) == 0)
1659                 return 0;
1660
1661         active_node = wc->nodes[wc->active_node];
1662         inode_cache = &active_node->inode_cache;
1663         nritems = btrfs_header_nritems(eb);
1664         for (i = 0; i < nritems; i++) {
1665                 btrfs_item_key_to_cpu(eb, &key, i);
1666
1667                 if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
1668                         continue;
1669                 if (key.type == BTRFS_ORPHAN_ITEM_KEY)
1670                         continue;
1671
1672                 if (active_node->current == NULL ||
1673                     active_node->current->ino < key.objectid) {
1674                         if (active_node->current) {
1675                                 active_node->current->checked = 1;
1676                                 maybe_free_inode_rec(inode_cache,
1677                                                      active_node->current);
1678                         }
1679                         active_node->current = get_inode_rec(inode_cache,
1680                                                              key.objectid, 1);
1681                         BUG_ON(IS_ERR(active_node->current));
1682                 }
1683                 switch (key.type) {
1684                 case BTRFS_DIR_ITEM_KEY:
1685                 case BTRFS_DIR_INDEX_KEY:
1686                         ret = process_dir_item(eb, i, &key, active_node);
1687                         break;
1688                 case BTRFS_INODE_REF_KEY:
1689                         ret = process_inode_ref(eb, i, &key, active_node);
1690                         break;
1691                 case BTRFS_INODE_EXTREF_KEY:
1692                         ret = process_inode_extref(eb, i, &key, active_node);
1693                         break;
1694                 case BTRFS_INODE_ITEM_KEY:
1695                         ret = process_inode_item(eb, i, &key, active_node);
1696                         break;
1697                 case BTRFS_EXTENT_DATA_KEY:
1698                         ret = process_file_extent(root, eb, i, &key,
1699                                                   active_node);
1700                         break;
1701                 default:
1702                         break;
1703                 };
1704         }
1705         return ret;
1706 }
1707
1708 struct node_refs {
1709         u64 bytenr[BTRFS_MAX_LEVEL];
1710         u64 refs[BTRFS_MAX_LEVEL];
1711         int need_check[BTRFS_MAX_LEVEL];
1712         /* field for checking all trees */
1713         int checked[BTRFS_MAX_LEVEL];
1714         /* the corresponding extent should be marked as full backref or not */
1715         int full_backref[BTRFS_MAX_LEVEL];
1716 };
1717
1718 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
1719                              struct extent_buffer *eb, struct node_refs *nrefs,
1720                              u64 level, int check_all);
1721 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
1722                             unsigned int ext_ref);
1723
1724 /*
1725  * Returns >0  Found error, not fatal, should continue
1726  * Returns <0  Fatal error, must exit the whole check
1727  * Returns 0   No errors found
1728  */
1729 static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
1730                                struct node_refs *nrefs, int *level, int ext_ref)
1731 {
1732         struct extent_buffer *cur = path->nodes[0];
1733         struct btrfs_key key;
1734         u64 cur_bytenr;
1735         u32 nritems;
1736         u64 first_ino = 0;
1737         int root_level = btrfs_header_level(root->node);
1738         int i;
1739         int ret = 0; /* Final return value */
1740         int err = 0; /* Positive error bitmap */
1741
1742         cur_bytenr = cur->start;
1743
1744         /* skip to first inode item or the first inode number change */
1745         nritems = btrfs_header_nritems(cur);
1746         for (i = 0; i < nritems; i++) {
1747                 btrfs_item_key_to_cpu(cur, &key, i);
1748                 if (i == 0)
1749                         first_ino = key.objectid;
1750                 if (key.type == BTRFS_INODE_ITEM_KEY ||
1751                     (first_ino && first_ino != key.objectid))
1752                         break;
1753         }
1754         if (i == nritems) {
1755                 path->slots[0] = nritems;
1756                 return 0;
1757         }
1758         path->slots[0] = i;
1759
1760 again:
1761         err |= check_inode_item(root, path, ext_ref);
1762
1763         /* modify cur since check_inode_item may change path */
1764         cur = path->nodes[0];
1765
1766         if (err & LAST_ITEM)
1767                 goto out;
1768
1769         /* still have inode items in thie leaf */
1770         if (cur->start == cur_bytenr)
1771                 goto again;
1772
1773         /*
1774          * we have switched to another leaf, above nodes may
1775          * have changed, here walk down the path, if a node
1776          * or leaf is shared, check whether we can skip this
1777          * node or leaf.
1778          */
1779         for (i = root_level; i >= 0; i--) {
1780                 if (path->nodes[i]->start == nrefs->bytenr[i])
1781                         continue;
1782
1783                 ret = update_nodes_refs(root, path->nodes[i]->start,
1784                                 path->nodes[i], nrefs, i, 0);
1785                 if (ret)
1786                         goto out;
1787
1788                 if (!nrefs->need_check[i]) {
1789                         *level += 1;
1790                         break;
1791                 }
1792         }
1793
1794         for (i = 0; i < *level; i++) {
1795                 free_extent_buffer(path->nodes[i]);
1796                 path->nodes[i] = NULL;
1797         }
1798 out:
1799         err &= ~LAST_ITEM;
1800         if (err && !ret)
1801                 ret = err;
1802         return ret;
1803 }
1804
1805 static void reada_walk_down(struct btrfs_root *root,
1806                             struct extent_buffer *node, int slot)
1807 {
1808         struct btrfs_fs_info *fs_info = root->fs_info;
1809         u64 bytenr;
1810         u64 ptr_gen;
1811         u32 nritems;
1812         int i;
1813         int level;
1814
1815         level = btrfs_header_level(node);
1816         if (level != 1)
1817                 return;
1818
1819         nritems = btrfs_header_nritems(node);
1820         for (i = slot; i < nritems; i++) {
1821                 bytenr = btrfs_node_blockptr(node, i);
1822                 ptr_gen = btrfs_node_ptr_generation(node, i);
1823                 readahead_tree_block(fs_info, bytenr, ptr_gen);
1824         }
1825 }
1826
1827 /*
1828  * Check the child node/leaf by the following condition:
1829  * 1. the first item key of the node/leaf should be the same with the one
1830  *    in parent.
1831  * 2. block in parent node should match the child node/leaf.
1832  * 3. generation of parent node and child's header should be consistent.
1833  *
1834  * Or the child node/leaf pointed by the key in parent is not valid.
1835  *
1836  * We hope to check leaf owner too, but since subvol may share leaves,
1837  * which makes leaf owner check not so strong, key check should be
1838  * sufficient enough for that case.
1839  */
1840 static int check_child_node(struct extent_buffer *parent, int slot,
1841                             struct extent_buffer *child)
1842 {
1843         struct btrfs_key parent_key;
1844         struct btrfs_key child_key;
1845         int ret = 0;
1846
1847         btrfs_node_key_to_cpu(parent, &parent_key, slot);
1848         if (btrfs_header_level(child) == 0)
1849                 btrfs_item_key_to_cpu(child, &child_key, 0);
1850         else
1851                 btrfs_node_key_to_cpu(child, &child_key, 0);
1852
1853         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
1854                 ret = -EINVAL;
1855                 fprintf(stderr,
1856                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
1857                         parent_key.objectid, parent_key.type, parent_key.offset,
1858                         child_key.objectid, child_key.type, child_key.offset);
1859         }
1860         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
1861                 ret = -EINVAL;
1862                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
1863                         btrfs_node_blockptr(parent, slot),
1864                         btrfs_header_bytenr(child));
1865         }
1866         if (btrfs_node_ptr_generation(parent, slot) !=
1867             btrfs_header_generation(child)) {
1868                 ret = -EINVAL;
1869                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
1870                         btrfs_header_generation(child),
1871                         btrfs_node_ptr_generation(parent, slot));
1872         }
1873         return ret;
1874 }
1875
1876 /*
1877  * for a tree node or leaf, if it's shared, indeed we don't need to iterate it
1878  * in every fs or file tree check. Here we find its all root ids, and only check
1879  * it in the fs or file tree which has the smallest root id.
1880  */
1881 static int need_check(struct btrfs_root *root, struct ulist *roots)
1882 {
1883         struct rb_node *node;
1884         struct ulist_node *u;
1885
1886         /*
1887          * @roots can be empty if it belongs to tree reloc tree
1888          * In that case, we should always check the leaf, as we can't use
1889          * the tree owner to ensure some other root will check it.
1890          */
1891         if (roots->nnodes == 1 || roots->nnodes == 0)
1892                 return 1;
1893
1894         node = rb_first(&roots->root);
1895         u = rb_entry(node, struct ulist_node, rb_node);
1896         /*
1897          * current root id is not smallest, we skip it and let it be checked
1898          * in the fs or file tree who hash the smallest root id.
1899          */
1900         if (root->objectid != u->val)
1901                 return 0;
1902
1903         return 1;
1904 }
1905
1906 static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb,
1907                                u64 *flags_ret)
1908 {
1909         struct btrfs_root *extent_root = root->fs_info->extent_root;
1910         struct btrfs_root_item *ri = &root->root_item;
1911         struct btrfs_extent_inline_ref *iref;
1912         struct btrfs_extent_item *ei;
1913         struct btrfs_key key;
1914         struct btrfs_path *path = NULL;
1915         unsigned long ptr;
1916         unsigned long end;
1917         u64 flags;
1918         u64 owner = 0;
1919         u64 offset;
1920         int slot;
1921         int type;
1922         int ret = 0;
1923
1924         /*
1925          * Except file/reloc tree, we can not have FULL BACKREF MODE
1926          */
1927         if (root->objectid < BTRFS_FIRST_FREE_OBJECTID)
1928                 goto normal;
1929
1930         /* root node */
1931         if (eb->start == btrfs_root_bytenr(ri))
1932                 goto normal;
1933
1934         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC))
1935                 goto full_backref;
1936
1937         owner = btrfs_header_owner(eb);
1938         if (owner == root->objectid)
1939                 goto normal;
1940
1941         path = btrfs_alloc_path();
1942         if (!path)
1943                 return -ENOMEM;
1944
1945         key.objectid = btrfs_header_bytenr(eb);
1946         key.type = (u8)-1;
1947         key.offset = (u64)-1;
1948
1949         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1950         if (ret <= 0) {
1951                 ret = -EIO;
1952                 goto out;
1953         }
1954
1955         if (ret > 0) {
1956                 ret = btrfs_previous_extent_item(extent_root, path,
1957                                                  key.objectid);
1958                 if (ret)
1959                         goto full_backref;
1960
1961         }
1962         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1963
1964         eb = path->nodes[0];
1965         slot = path->slots[0];
1966         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
1967
1968         flags = btrfs_extent_flags(eb, ei);
1969         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
1970                 goto full_backref;
1971
1972         ptr = (unsigned long)(ei + 1);
1973         end = (unsigned long)ei + btrfs_item_size_nr(eb, slot);
1974
1975         if (key.type == BTRFS_EXTENT_ITEM_KEY)
1976                 ptr += sizeof(struct btrfs_tree_block_info);
1977
1978 next:
1979         /* Reached extent item ends normally */
1980         if (ptr == end)
1981                 goto full_backref;
1982
1983         /* Beyond extent item end, wrong item size */
1984         if (ptr > end) {
1985                 error("extent item at bytenr %llu slot %d has wrong size",
1986                         eb->start, slot);
1987                 goto full_backref;
1988         }
1989
1990         iref = (struct btrfs_extent_inline_ref *)ptr;
1991         offset = btrfs_extent_inline_ref_offset(eb, iref);
1992         type = btrfs_extent_inline_ref_type(eb, iref);
1993
1994         if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner)
1995                 goto normal;
1996         ptr += btrfs_extent_inline_ref_size(type);
1997         goto next;
1998
1999 normal:
2000         *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
2001         goto out;
2002
2003 full_backref:
2004         *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2005 out:
2006         btrfs_free_path(path);
2007         return ret;
2008 }
2009
2010 /*
2011  * for a tree node or leaf, we record its reference count, so later if we still
2012  * process this node or leaf, don't need to compute its reference count again.
2013  *
2014  * @bytenr  if @bytenr == (u64)-1, only update nrefs->full_backref[level]
2015  */
2016 static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
2017                              struct extent_buffer *eb, struct node_refs *nrefs,
2018                              u64 level, int check_all)
2019 {
2020         struct ulist *roots;
2021         u64 refs = 0;
2022         u64 flags = 0;
2023         int root_level = btrfs_header_level(root->node);
2024         int check;
2025         int ret;
2026
2027         if (nrefs->bytenr[level] == bytenr)
2028                 return 0;
2029
2030         if (bytenr != (u64)-1) {
2031                 /* the return value of this function seems a mistake */
2032                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2033                                        level, 1, &refs, &flags);
2034                 /* temporary fix */
2035                 if (ret < 0 && !check_all)
2036                         return ret;
2037
2038                 nrefs->bytenr[level] = bytenr;
2039                 nrefs->refs[level] = refs;
2040                 nrefs->full_backref[level] = 0;
2041                 nrefs->checked[level] = 0;
2042
2043                 if (refs > 1) {
2044                         ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
2045                                                    0, &roots);
2046                         if (ret)
2047                                 return -EIO;
2048
2049                         check = need_check(root, roots);
2050                         ulist_free(roots);
2051                         nrefs->need_check[level] = check;
2052                 } else {
2053                         if (!check_all) {
2054                                 nrefs->need_check[level] = 1;
2055                         } else {
2056                                 if (level == root_level) {
2057                                         nrefs->need_check[level] = 1;
2058                                 } else {
2059                                         /*
2060                                          * The node refs may have not been
2061                                          * updated if upper needs checking (the
2062                                          * lowest root_objectid) the node can
2063                                          * be checked.
2064                                          */
2065                                         nrefs->need_check[level] =
2066                                                 nrefs->need_check[level + 1];
2067                                 }
2068                         }
2069                 }
2070         }
2071
2072         if (check_all && eb) {
2073                 calc_extent_flag_v2(root, eb, &flags);
2074                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2075                         nrefs->full_backref[level] = 1;
2076         }
2077
2078         return 0;
2079 }
2080
2081 /*
2082  * @level           if @level == -1 means extent data item
2083  *                  else normal treeblocl.
2084  */
2085 static int should_check_extent_strictly(struct btrfs_root *root,
2086                                         struct node_refs *nrefs, int level)
2087 {
2088         int root_level = btrfs_header_level(root->node);
2089
2090         if (level > root_level || level < -1)
2091                 return 1;
2092         if (level == root_level)
2093                 return 1;
2094         /*
2095          * if the upper node is marked full backref, it should contain shared
2096          * backref of the parent (except owner == root->objectid).
2097          */
2098         while (++level <= root_level)
2099                 if (nrefs->refs[level] > 1)
2100                         return 0;
2101
2102         return 1;
2103 }
2104
2105 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
2106                           struct walk_control *wc, int *level,
2107                           struct node_refs *nrefs)
2108 {
2109         enum btrfs_tree_block_status status;
2110         u64 bytenr;
2111         u64 ptr_gen;
2112         struct btrfs_fs_info *fs_info = root->fs_info;
2113         struct extent_buffer *next;
2114         struct extent_buffer *cur;
2115         int ret, err = 0;
2116         u64 refs;
2117
2118         WARN_ON(*level < 0);
2119         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2120
2121         if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
2122                 refs = nrefs->refs[*level];
2123                 ret = 0;
2124         } else {
2125                 ret = btrfs_lookup_extent_info(NULL, root,
2126                                        path->nodes[*level]->start,
2127                                        *level, 1, &refs, NULL);
2128                 if (ret < 0) {
2129                         err = ret;
2130                         goto out;
2131                 }
2132                 nrefs->bytenr[*level] = path->nodes[*level]->start;
2133                 nrefs->refs[*level] = refs;
2134         }
2135
2136         if (refs > 1) {
2137                 ret = enter_shared_node(root, path->nodes[*level]->start,
2138                                         refs, wc, *level);
2139                 if (ret > 0) {
2140                         err = ret;
2141                         goto out;
2142                 }
2143         }
2144
2145         while (*level >= 0) {
2146                 WARN_ON(*level < 0);
2147                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2148                 cur = path->nodes[*level];
2149
2150                 if (btrfs_header_level(cur) != *level)
2151                         WARN_ON(1);
2152
2153                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2154                         break;
2155                 if (*level == 0) {
2156                         ret = process_one_leaf(root, cur, wc);
2157                         if (ret < 0)
2158                                 err = ret;
2159                         break;
2160                 }
2161                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2162                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2163
2164                 if (bytenr == nrefs->bytenr[*level - 1]) {
2165                         refs = nrefs->refs[*level - 1];
2166                 } else {
2167                         ret = btrfs_lookup_extent_info(NULL, root, bytenr,
2168                                         *level - 1, 1, &refs, NULL);
2169                         if (ret < 0) {
2170                                 refs = 0;
2171                         } else {
2172                                 nrefs->bytenr[*level - 1] = bytenr;
2173                                 nrefs->refs[*level - 1] = refs;
2174                         }
2175                 }
2176
2177                 if (refs > 1) {
2178                         ret = enter_shared_node(root, bytenr, refs,
2179                                                 wc, *level - 1);
2180                         if (ret > 0) {
2181                                 path->slots[*level]++;
2182                                 continue;
2183                         }
2184                 }
2185
2186                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2187                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2188                         free_extent_buffer(next);
2189                         reada_walk_down(root, cur, path->slots[*level]);
2190                         next = read_tree_block(root->fs_info, bytenr, ptr_gen);
2191                         if (!extent_buffer_uptodate(next)) {
2192                                 struct btrfs_key node_key;
2193
2194                                 btrfs_node_key_to_cpu(path->nodes[*level],
2195                                                       &node_key,
2196                                                       path->slots[*level]);
2197                                 btrfs_add_corrupt_extent_record(root->fs_info,
2198                                                 &node_key,
2199                                                 path->nodes[*level]->start,
2200                                                 root->fs_info->nodesize,
2201                                                 *level);
2202                                 err = -EIO;
2203                                 goto out;
2204                         }
2205                 }
2206
2207                 ret = check_child_node(cur, path->slots[*level], next);
2208                 if (ret) {
2209                         free_extent_buffer(next);
2210                         err = ret;
2211                         goto out;
2212                 }
2213
2214                 if (btrfs_is_leaf(next))
2215                         status = btrfs_check_leaf(root, NULL, next);
2216                 else
2217                         status = btrfs_check_node(root, NULL, next);
2218                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2219                         free_extent_buffer(next);
2220                         err = -EIO;
2221                         goto out;
2222                 }
2223
2224                 *level = *level - 1;
2225                 free_extent_buffer(path->nodes[*level]);
2226                 path->nodes[*level] = next;
2227                 path->slots[*level] = 0;
2228         }
2229 out:
2230         path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
2231         return err;
2232 }
2233
2234 static int fs_root_objectid(u64 objectid);
2235
2236 /*
2237  * Update global fs information.
2238  */
2239 static void account_bytes(struct btrfs_root *root, struct btrfs_path *path,
2240                          int level)
2241 {
2242         u32 free_nrs;
2243         struct extent_buffer *eb = path->nodes[level];
2244
2245         total_btree_bytes += eb->len;
2246         if (fs_root_objectid(root->objectid))
2247                 total_fs_tree_bytes += eb->len;
2248         if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID)
2249                 total_extent_tree_bytes += eb->len;
2250
2251         if (level == 0) {
2252                 btree_space_waste += btrfs_leaf_free_space(root, eb);
2253         } else {
2254                 free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) -
2255                             btrfs_header_nritems(eb));
2256                 btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr);
2257         }
2258 }
2259
2260 /*
2261  * This function only handles BACKREF_MISSING,
2262  * If corresponding extent item exists, increase the ref, else insert an extent
2263  * item and backref.
2264  *
2265  * Returns error bits after repair.
2266  */
2267 static int repair_tree_block_ref(struct btrfs_trans_handle *trans,
2268                                  struct btrfs_root *root,
2269                                  struct extent_buffer *node,
2270                                  struct node_refs *nrefs, int level, int err)
2271 {
2272         struct btrfs_fs_info *fs_info = root->fs_info;
2273         struct btrfs_root *extent_root = fs_info->extent_root;
2274         struct btrfs_path path;
2275         struct btrfs_extent_item *ei;
2276         struct btrfs_tree_block_info *bi;
2277         struct btrfs_key key;
2278         struct extent_buffer *eb;
2279         u32 size = sizeof(*ei);
2280         u32 node_size = root->fs_info->nodesize;
2281         int insert_extent = 0;
2282         int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2283         int root_level = btrfs_header_level(root->node);
2284         int generation;
2285         int ret;
2286         u64 owner;
2287         u64 bytenr;
2288         u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
2289         u64 parent = 0;
2290
2291         if ((err & BACKREF_MISSING) == 0)
2292                 return err;
2293
2294         WARN_ON(level > BTRFS_MAX_LEVEL);
2295         WARN_ON(level < 0);
2296
2297         btrfs_init_path(&path);
2298         bytenr = btrfs_header_bytenr(node);
2299         owner = btrfs_header_owner(node);
2300         generation = btrfs_header_generation(node);
2301
2302         key.objectid = bytenr;
2303         key.type = (u8)-1;
2304         key.offset = (u64)-1;
2305
2306         /* Search for the extent item */
2307         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
2308         if (ret <= 0) {
2309                 ret = -EIO;
2310                 goto out;
2311         }
2312
2313         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
2314         if (ret)
2315                 insert_extent = 1;
2316
2317         /* calculate if the extent item flag is full backref or not */
2318         if (nrefs->full_backref[level] != 0)
2319                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2320
2321         /* insert an extent item */
2322         if (insert_extent) {
2323                 struct btrfs_disk_key copy_key;
2324
2325                 generation = btrfs_header_generation(node);
2326
2327                 if (level < root_level && nrefs->full_backref[level + 1] &&
2328                     owner != root->objectid) {
2329                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
2330                 }
2331
2332                 key.objectid = bytenr;
2333                 if (!skinny_metadata) {
2334                         key.type = BTRFS_EXTENT_ITEM_KEY;
2335                         key.offset = node_size;
2336                         size += sizeof(*bi);
2337                 } else {
2338                         key.type = BTRFS_METADATA_ITEM_KEY;
2339                         key.offset = level;
2340                 }
2341
2342                 btrfs_release_path(&path);
2343                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
2344                                               size);
2345                 if (ret)
2346                         goto out;
2347
2348                 eb = path.nodes[0];
2349                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
2350
2351                 btrfs_set_extent_refs(eb, ei, 0);
2352                 btrfs_set_extent_generation(eb, ei, generation);
2353                 btrfs_set_extent_flags(eb, ei, flags);
2354
2355                 if (!skinny_metadata) {
2356                         bi = (struct btrfs_tree_block_info *)(ei + 1);
2357                         memset_extent_buffer(eb, 0, (unsigned long)bi,
2358                                              sizeof(*bi));
2359                         btrfs_set_disk_key_objectid(&copy_key, root->objectid);
2360                         btrfs_set_disk_key_type(&copy_key, 0);
2361                         btrfs_set_disk_key_offset(&copy_key, 0);
2362
2363                         btrfs_set_tree_block_level(eb, bi, level);
2364                         btrfs_set_tree_block_key(eb, bi, &copy_key);
2365                 }
2366                 btrfs_mark_buffer_dirty(eb);
2367                 printf("Added an extent item [%llu %u]\n", bytenr, node_size);
2368                 btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0);
2369
2370                 nrefs->refs[level] = 0;
2371                 nrefs->full_backref[level] =
2372                         flags & BTRFS_BLOCK_FLAG_FULL_BACKREF;
2373                 btrfs_release_path(&path);
2374         }
2375
2376         if (level < root_level && nrefs->full_backref[level + 1] &&
2377             owner != root->objectid)
2378                 parent = nrefs->bytenr[level + 1];
2379
2380         /* increase the ref */
2381         ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size,
2382                         parent, root->objectid, level, 0);
2383
2384         nrefs->refs[level]++;
2385 out:
2386         btrfs_release_path(&path);
2387         if (ret) {
2388                 error(
2389         "failed to repair tree block ref start %llu root %llu due to %s",
2390                       bytenr, root->objectid, strerror(-ret));
2391         } else {
2392                 printf("Added one tree block ref start %llu %s %llu\n",
2393                        bytenr, parent ? "parent" : "root",
2394                        parent ? parent : root->objectid);
2395                 err &= ~BACKREF_MISSING;
2396         }
2397
2398         return err;
2399 }
2400
2401 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
2402                             unsigned int ext_ref);
2403 static int check_tree_block_ref(struct btrfs_root *root,
2404                                 struct extent_buffer *eb, u64 bytenr,
2405                                 int level, u64 owner, struct node_refs *nrefs);
2406 static int check_leaf_items(struct btrfs_trans_handle *trans,
2407                             struct btrfs_root *root, struct btrfs_path *path,
2408                             struct node_refs *nrefs, int account_bytes);
2409
2410 /*
2411  * @trans      just for lowmem repair mode
2412  * @check all  if not 0 then check all tree block backrefs and items
2413  *             0 then just check relationship of items in fs tree(s)
2414  *
2415  * Returns >0  Found error, should continue
2416  * Returns <0  Fatal error, must exit the whole check
2417  * Returns 0   No errors found
2418  */
2419 static int walk_down_tree_v2(struct btrfs_trans_handle *trans,
2420                              struct btrfs_root *root, struct btrfs_path *path,
2421                              int *level, struct node_refs *nrefs, int ext_ref,
2422                              int check_all)
2423
2424 {
2425         enum btrfs_tree_block_status status;
2426         u64 bytenr;
2427         u64 ptr_gen;
2428         struct btrfs_fs_info *fs_info = root->fs_info;
2429         struct extent_buffer *next;
2430         struct extent_buffer *cur;
2431         int ret;
2432         int err = 0;
2433         int check;
2434         int account_file_data = 0;
2435
2436         WARN_ON(*level < 0);
2437         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2438
2439         ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]),
2440                                 path->nodes[*level], nrefs, *level, check_all);
2441         if (ret < 0)
2442                 return ret;
2443
2444         while (*level >= 0) {
2445                 WARN_ON(*level < 0);
2446                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2447                 cur = path->nodes[*level];
2448                 bytenr = btrfs_header_bytenr(cur);
2449                 check = nrefs->need_check[*level];
2450
2451                 if (btrfs_header_level(cur) != *level)
2452                         WARN_ON(1);
2453                /*
2454                 * Update bytes accounting and check tree block ref
2455                 * NOTE: Doing accounting and check before checking nritems
2456                 * is necessary because of empty node/leaf.
2457                 */
2458                 if ((check_all && !nrefs->checked[*level]) ||
2459                     (!check_all && nrefs->need_check[*level])) {
2460                         ret = check_tree_block_ref(root, cur,
2461                            btrfs_header_bytenr(cur), btrfs_header_level(cur),
2462                            btrfs_header_owner(cur), nrefs);
2463
2464                         if (repair && ret)
2465                                 ret = repair_tree_block_ref(trans, root,
2466                                     path->nodes[*level], nrefs, *level, ret);
2467                         err |= ret;
2468
2469                         if (check_all && nrefs->need_check[*level] &&
2470                                 nrefs->refs[*level]) {
2471                                 account_bytes(root, path, *level);
2472                                 account_file_data = 1;
2473                         }
2474                         nrefs->checked[*level] = 1;
2475                 }
2476
2477                 if (path->slots[*level] >= btrfs_header_nritems(cur))
2478                         break;
2479
2480                 /* Don't forgot to check leaf/node validation */
2481                 if (*level == 0) {
2482                         /* skip duplicate check */
2483                         if (check || !check_all) {
2484                                 ret = btrfs_check_leaf(root, NULL, cur);
2485                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2486                                         err |= -EIO;
2487                                         break;
2488                                 }
2489                         }
2490
2491                         ret = 0;
2492                         if (!check_all)
2493                                 ret = process_one_leaf_v2(root, path, nrefs,
2494                                                           level, ext_ref);
2495                         else
2496                                 ret = check_leaf_items(trans, root, path,
2497                                                nrefs, account_file_data);
2498                         err |= ret;
2499                         break;
2500                 } else {
2501                         if (check || !check_all) {
2502                                 ret = btrfs_check_node(root, NULL, cur);
2503                                 if (ret != BTRFS_TREE_BLOCK_CLEAN) {
2504                                         err |= -EIO;
2505                                         break;
2506                                 }
2507                         }
2508                 }
2509
2510                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2511                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2512
2513                 ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1,
2514                                         check_all);
2515                 if (ret < 0)
2516                         break;
2517                 /*
2518                  * check all trees in check_chunks_and_extent_v2
2519                  * check shared node once in check_fs_roots
2520                  */
2521                 if (!check_all && !nrefs->need_check[*level - 1]) {
2522                         path->slots[*level]++;
2523                         continue;
2524                 }
2525
2526                 next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
2527                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2528                         free_extent_buffer(next);
2529                         reada_walk_down(root, cur, path->slots[*level]);
2530                         next = read_tree_block(fs_info, bytenr, ptr_gen);
2531                         if (!extent_buffer_uptodate(next)) {
2532                                 struct btrfs_key node_key;
2533
2534                                 btrfs_node_key_to_cpu(path->nodes[*level],
2535                                                       &node_key,
2536                                                       path->slots[*level]);
2537                                 btrfs_add_corrupt_extent_record(fs_info,
2538                                         &node_key, path->nodes[*level]->start,
2539                                         fs_info->nodesize, *level);
2540                                 err |= -EIO;
2541                                 break;
2542                         }
2543                 }
2544
2545                 ret = check_child_node(cur, path->slots[*level], next);
2546                 err |= ret;
2547                 if (ret < 0) 
2548                         break;
2549
2550                 if (btrfs_is_leaf(next))
2551                         status = btrfs_check_leaf(root, NULL, next);
2552                 else
2553                         status = btrfs_check_node(root, NULL, next);
2554                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
2555                         free_extent_buffer(next);
2556                         err |= -EIO;
2557                         break;
2558                 }
2559
2560                 *level = *level - 1;
2561                 free_extent_buffer(path->nodes[*level]);
2562                 path->nodes[*level] = next;
2563                 path->slots[*level] = 0;
2564                 account_file_data = 0;
2565
2566                 update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all);
2567         }
2568         return err;
2569 }
2570
2571 static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
2572                         struct walk_control *wc, int *level)
2573 {
2574         int i;
2575         struct extent_buffer *leaf;
2576
2577         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2578                 leaf = path->nodes[i];
2579                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2580                         path->slots[i]++;
2581                         *level = i;
2582                         return 0;
2583                 } else {
2584                         free_extent_buffer(path->nodes[*level]);
2585                         path->nodes[*level] = NULL;
2586                         BUG_ON(*level > wc->active_node);
2587                         if (*level == wc->active_node)
2588                                 leave_shared_node(root, wc, *level);
2589                         *level = i + 1;
2590                 }
2591         }
2592         return 1;
2593 }
2594
2595 static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
2596                            int *level)
2597 {
2598         int i;
2599         struct extent_buffer *leaf;
2600
2601         for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2602                 leaf = path->nodes[i];
2603                 if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
2604                         path->slots[i]++;
2605                         *level = i;
2606                         return 0;
2607                 } else {
2608                         free_extent_buffer(path->nodes[*level]);
2609                         path->nodes[*level] = NULL;
2610                         *level = i + 1;
2611                 }
2612         }
2613         return 1;
2614 }
2615
2616 static int check_root_dir(struct inode_record *rec)
2617 {
2618         struct inode_backref *backref;
2619         int ret = -1;
2620
2621         if (!rec->found_inode_item || rec->errors)
2622                 goto out;
2623         if (rec->nlink != 1 || rec->found_link != 0)
2624                 goto out;
2625         if (list_empty(&rec->backrefs))
2626                 goto out;
2627         backref = to_inode_backref(rec->backrefs.next);
2628         if (!backref->found_inode_ref)
2629                 goto out;
2630         if (backref->index != 0 || backref->namelen != 2 ||
2631             memcmp(backref->name, "..", 2))
2632                 goto out;
2633         if (backref->found_dir_index || backref->found_dir_item)
2634                 goto out;
2635         ret = 0;
2636 out:
2637         return ret;
2638 }
2639
2640 static int repair_inode_isize(struct btrfs_trans_handle *trans,
2641                               struct btrfs_root *root, struct btrfs_path *path,
2642                               struct inode_record *rec)
2643 {
2644         struct btrfs_inode_item *ei;
2645         struct btrfs_key key;
2646         int ret;
2647
2648         key.objectid = rec->ino;
2649         key.type = BTRFS_INODE_ITEM_KEY;
2650         key.offset = (u64)-1;
2651
2652         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2653         if (ret < 0)
2654                 goto out;
2655         if (ret) {
2656                 if (!path->slots[0]) {
2657                         ret = -ENOENT;
2658                         goto out;
2659                 }
2660                 path->slots[0]--;
2661                 ret = 0;
2662         }
2663         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2664         if (key.objectid != rec->ino) {
2665                 ret = -ENOENT;
2666                 goto out;
2667         }
2668
2669         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2670                             struct btrfs_inode_item);
2671         btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
2672         btrfs_mark_buffer_dirty(path->nodes[0]);
2673         rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2674         printf("reset isize for dir %Lu root %Lu\n", rec->ino,
2675                root->root_key.objectid);
2676 out:
2677         btrfs_release_path(path);
2678         return ret;
2679 }
2680
2681 static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
2682                                     struct btrfs_root *root,
2683                                     struct btrfs_path *path,
2684                                     struct inode_record *rec)
2685 {
2686         int ret;
2687
2688         ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
2689         btrfs_release_path(path);
2690         if (!ret)
2691                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
2692         return ret;
2693 }
2694
2695 static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
2696                                struct btrfs_root *root,
2697                                struct btrfs_path *path,
2698                                struct inode_record *rec)
2699 {
2700         struct btrfs_inode_item *ei;
2701         struct btrfs_key key;
2702         int ret = 0;
2703
2704         key.objectid = rec->ino;
2705         key.type = BTRFS_INODE_ITEM_KEY;
2706         key.offset = 0;
2707
2708         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2709         if (ret) {
2710                 if (ret > 0)
2711                         ret = -ENOENT;
2712                 goto out;
2713         }
2714
2715         /* Since ret == 0, no need to check anything */
2716         ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2717                             struct btrfs_inode_item);
2718         btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
2719         btrfs_mark_buffer_dirty(path->nodes[0]);
2720         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
2721         printf("reset nbytes for ino %llu root %llu\n",
2722                rec->ino, root->root_key.objectid);
2723 out:
2724         btrfs_release_path(path);
2725         return ret;
2726 }
2727
2728 static int add_missing_dir_index(struct btrfs_root *root,
2729                                  struct cache_tree *inode_cache,
2730                                  struct inode_record *rec,
2731                                  struct inode_backref *backref)
2732 {
2733         struct btrfs_path path;
2734         struct btrfs_trans_handle *trans;
2735         struct btrfs_dir_item *dir_item;
2736         struct extent_buffer *leaf;
2737         struct btrfs_key key;
2738         struct btrfs_disk_key disk_key;
2739         struct inode_record *dir_rec;
2740         unsigned long name_ptr;
2741         u32 data_size = sizeof(*dir_item) + backref->namelen;
2742         int ret;
2743
2744         trans = btrfs_start_transaction(root, 1);
2745         if (IS_ERR(trans))
2746                 return PTR_ERR(trans);
2747
2748         fprintf(stderr, "repairing missing dir index item for inode %llu\n",
2749                 (unsigned long long)rec->ino);
2750
2751         btrfs_init_path(&path);
2752         key.objectid = backref->dir;
2753         key.type = BTRFS_DIR_INDEX_KEY;
2754         key.offset = backref->index;
2755         ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
2756         BUG_ON(ret);
2757
2758         leaf = path.nodes[0];
2759         dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
2760
2761         disk_key.objectid = cpu_to_le64(rec->ino);
2762         disk_key.type = BTRFS_INODE_ITEM_KEY;
2763         disk_key.offset = 0;
2764
2765         btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
2766         btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
2767         btrfs_set_dir_data_len(leaf, dir_item, 0);
2768         btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
2769         name_ptr = (unsigned long)(dir_item + 1);
2770         write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
2771         btrfs_mark_buffer_dirty(leaf);
2772         btrfs_release_path(&path);
2773         btrfs_commit_transaction(trans, root);
2774
2775         backref->found_dir_index = 1;
2776         dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
2777         BUG_ON(IS_ERR(dir_rec));
2778         if (!dir_rec)
2779                 return 0;
2780         dir_rec->found_size += backref->namelen;
2781         if (dir_rec->found_size == dir_rec->isize &&
2782             (dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
2783                 dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
2784         if (dir_rec->found_size != dir_rec->isize)
2785                 dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
2786
2787         return 0;
2788 }
2789
2790 static int delete_dir_index(struct btrfs_root *root,
2791                             struct inode_backref *backref)
2792 {
2793         struct btrfs_trans_handle *trans;
2794         struct btrfs_dir_item *di;
2795         struct btrfs_path path;
2796         int ret = 0;
2797
2798         trans = btrfs_start_transaction(root, 1);
2799         if (IS_ERR(trans))
2800                 return PTR_ERR(trans);
2801
2802         fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
2803                 (unsigned long long)backref->dir,
2804                 BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
2805                 (unsigned long long)root->objectid);
2806
2807         btrfs_init_path(&path);
2808         di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
2809                                     backref->name, backref->namelen,
2810                                     backref->index, -1);
2811         if (IS_ERR(di)) {
2812                 ret = PTR_ERR(di);
2813                 btrfs_release_path(&path);
2814                 btrfs_commit_transaction(trans, root);
2815                 if (ret == -ENOENT)
2816                         return 0;
2817                 return ret;
2818         }
2819
2820         if (!di)
2821                 ret = btrfs_del_item(trans, root, &path);
2822         else
2823                 ret = btrfs_delete_one_dir_name(trans, root, &path, di);
2824         BUG_ON(ret);
2825         btrfs_release_path(&path);
2826         btrfs_commit_transaction(trans, root);
2827         return ret;
2828 }
2829
2830 static int __create_inode_item(struct btrfs_trans_handle *trans,
2831                                struct btrfs_root *root, u64 ino, u64 size,
2832                                u64 nbytes, u64 nlink, u32 mode)
2833 {
2834         struct btrfs_inode_item ii;
2835         time_t now = time(NULL);
2836         int ret;
2837
2838         btrfs_set_stack_inode_size(&ii, size);
2839         btrfs_set_stack_inode_nbytes(&ii, nbytes);
2840         btrfs_set_stack_inode_nlink(&ii, nlink);
2841         btrfs_set_stack_inode_mode(&ii, mode);
2842         btrfs_set_stack_inode_generation(&ii, trans->transid);
2843         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
2844         btrfs_set_stack_timespec_sec(&ii.ctime, now);
2845         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
2846         btrfs_set_stack_timespec_sec(&ii.mtime, now);
2847         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
2848         btrfs_set_stack_timespec_sec(&ii.otime, 0);
2849         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
2850
2851         ret = btrfs_insert_inode(trans, root, ino, &ii);
2852         ASSERT(!ret);
2853
2854         warning("root %llu inode %llu recreating inode item, this may "
2855                 "be incomplete, please check permissions and content after "
2856                 "the fsck completes.\n", (unsigned long long)root->objectid,
2857                 (unsigned long long)ino);
2858
2859         return 0;
2860 }
2861
2862 static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
2863                                     struct btrfs_root *root, u64 ino,
2864                                     u8 filetype)
2865 {
2866         u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
2867
2868         return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
2869 }
2870
2871 static int create_inode_item(struct btrfs_root *root,
2872                              struct inode_record *rec, int root_dir)
2873 {
2874         struct btrfs_trans_handle *trans;
2875         u64 nlink = 0;
2876         u32 mode = 0;
2877         u64 size = 0;
2878         int ret;
2879
2880         trans = btrfs_start_transaction(root, 1);
2881         if (IS_ERR(trans)) {
2882                 ret = PTR_ERR(trans);
2883                 return ret;
2884         }
2885
2886         nlink = root_dir ? 1 : rec->found_link;
2887         if (rec->found_dir_item) {
2888                 if (rec->found_file_extent)
2889                         fprintf(stderr, "root %llu inode %llu has both a dir "
2890                                 "item and extents, unsure if it is a dir or a "
2891                                 "regular file so setting it as a directory\n",
2892                                 (unsigned long long)root->objectid,
2893                                 (unsigned long long)rec->ino);
2894                 mode = S_IFDIR | 0755;
2895                 size = rec->found_size;
2896         } else if (!rec->found_dir_item) {
2897                 size = rec->extent_end;
2898                 mode =  S_IFREG | 0755;
2899         }
2900
2901         ret = __create_inode_item(trans, root, rec->ino, size, rec->nbytes,
2902                                   nlink, mode);
2903         btrfs_commit_transaction(trans, root);
2904         return 0;
2905 }
2906
2907 static int repair_inode_backrefs(struct btrfs_root *root,
2908                                  struct inode_record *rec,
2909                                  struct cache_tree *inode_cache,
2910                                  int delete)
2911 {
2912         struct inode_backref *tmp, *backref;
2913         u64 root_dirid = btrfs_root_dirid(&root->root_item);
2914         int ret = 0;
2915         int repaired = 0;
2916
2917         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
2918                 if (!delete && rec->ino == root_dirid) {
2919                         if (!rec->found_inode_item) {
2920                                 ret = create_inode_item(root, rec, 1);
2921                                 if (ret)
2922                                         break;
2923                                 repaired++;
2924                         }
2925                 }
2926
2927                 /* Index 0 for root dir's are special, don't mess with it */
2928                 if (rec->ino == root_dirid && backref->index == 0)
2929                         continue;
2930
2931                 if (delete &&
2932                     ((backref->found_dir_index && !backref->found_inode_ref) ||
2933                      (backref->found_dir_index && backref->found_inode_ref &&
2934                       (backref->errors & REF_ERR_INDEX_UNMATCH)))) {
2935                         ret = delete_dir_index(root, backref);
2936                         if (ret)
2937                                 break;
2938                         repaired++;
2939                         list_del(&backref->list);
2940                         free(backref);
2941                         continue;
2942                 }
2943
2944                 if (!delete && !backref->found_dir_index &&
2945                     backref->found_dir_item && backref->found_inode_ref) {
2946                         ret = add_missing_dir_index(root, inode_cache, rec,
2947                                                     backref);
2948                         if (ret)
2949                                 break;
2950                         repaired++;
2951                         if (backref->found_dir_item &&
2952                             backref->found_dir_index) {
2953                                 if (!backref->errors &&
2954                                     backref->found_inode_ref) {
2955                                         list_del(&backref->list);
2956                                         free(backref);
2957                                         continue;
2958                                 }
2959                         }
2960                 }
2961
2962                 if (!delete && (!backref->found_dir_index &&
2963                                 !backref->found_dir_item &&
2964                                 backref->found_inode_ref)) {
2965                         struct btrfs_trans_handle *trans;
2966                         struct btrfs_key location;
2967
2968                         ret = check_dir_conflict(root, backref->name,
2969                                                  backref->namelen,
2970                                                  backref->dir,
2971                                                  backref->index);
2972                         if (ret) {
2973                                 /*
2974                                  * let nlink fixing routine to handle it,
2975                                  * which can do it better.
2976                                  */
2977                                 ret = 0;
2978                                 break;
2979                         }
2980                         location.objectid = rec->ino;
2981                         location.type = BTRFS_INODE_ITEM_KEY;
2982                         location.offset = 0;
2983
2984                         trans = btrfs_start_transaction(root, 1);
2985                         if (IS_ERR(trans)) {
2986                                 ret = PTR_ERR(trans);
2987                                 break;
2988                         }
2989                         fprintf(stderr, "adding missing dir index/item pair "
2990                                 "for inode %llu\n",
2991                                 (unsigned long long)rec->ino);
2992                         ret = btrfs_insert_dir_item(trans, root, backref->name,
2993                                                     backref->namelen,
2994                                                     backref->dir, &location,
2995                                                     imode_to_type(rec->imode),
2996                                                     backref->index);
2997                         BUG_ON(ret);
2998                         btrfs_commit_transaction(trans, root);
2999                         repaired++;
3000                 }
3001
3002                 if (!delete && (backref->found_inode_ref &&
3003                                 backref->found_dir_index &&
3004                                 backref->found_dir_item &&
3005                                 !(backref->errors & REF_ERR_INDEX_UNMATCH) &&
3006                                 !rec->found_inode_item)) {
3007                         ret = create_inode_item(root, rec, 0);
3008                         if (ret)
3009                                 break;
3010                         repaired++;
3011                 }
3012
3013         }
3014         return ret ? ret : repaired;
3015 }
3016
3017 /*
3018  * To determine the file type for nlink/inode_item repair
3019  *
3020  * Return 0 if file type is found and BTRFS_FT_* is stored into type.
3021  * Return -ENOENT if file type is not found.
3022  */
3023 static int find_file_type(struct inode_record *rec, u8 *type)
3024 {
3025         struct inode_backref *backref;
3026
3027         /* For inode item recovered case */
3028         if (rec->found_inode_item) {
3029                 *type = imode_to_type(rec->imode);
3030                 return 0;
3031         }
3032
3033         list_for_each_entry(backref, &rec->backrefs, list) {
3034                 if (backref->found_dir_index || backref->found_dir_item) {
3035                         *type = backref->filetype;
3036                         return 0;
3037                 }
3038         }
3039         return -ENOENT;
3040 }
3041
3042 /*
3043  * To determine the file name for nlink repair
3044  *
3045  * Return 0 if file name is found, set name and namelen.
3046  * Return -ENOENT if file name is not found.
3047  */
3048 static int find_file_name(struct inode_record *rec,
3049                           char *name, int *namelen)
3050 {
3051         struct inode_backref *backref;
3052
3053         list_for_each_entry(backref, &rec->backrefs, list) {
3054                 if (backref->found_dir_index || backref->found_dir_item ||
3055                     backref->found_inode_ref) {
3056                         memcpy(name, backref->name, backref->namelen);
3057                         *namelen = backref->namelen;
3058                         return 0;
3059                 }
3060         }
3061         return -ENOENT;
3062 }
3063
3064 /* Reset the nlink of the inode to the correct one */
3065 static int reset_nlink(struct btrfs_trans_handle *trans,
3066                        struct btrfs_root *root,
3067                        struct btrfs_path *path,
3068                        struct inode_record *rec)
3069 {
3070         struct inode_backref *backref;
3071         struct inode_backref *tmp;
3072         struct btrfs_key key;
3073         struct btrfs_inode_item *inode_item;
3074         int ret = 0;
3075
3076         /* We don't believe this either, reset it and iterate backref */
3077         rec->found_link = 0;
3078
3079         /* Remove all backref including the valid ones */
3080         list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
3081                 ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
3082                                    backref->index, backref->name,
3083                                    backref->namelen, 0);
3084                 if (ret < 0)
3085                         goto out;
3086
3087                 /* remove invalid backref, so it won't be added back */
3088                 if (!(backref->found_dir_index &&
3089                       backref->found_dir_item &&
3090                       backref->found_inode_ref)) {
3091                         list_del(&backref->list);
3092                         free(backref);
3093                 } else {
3094                         rec->found_link++;
3095                 }
3096         }
3097
3098         /* Set nlink to 0 */
3099         key.objectid = rec->ino;
3100         key.type = BTRFS_INODE_ITEM_KEY;
3101         key.offset = 0;
3102         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3103         if (ret < 0)
3104                 goto out;
3105         if (ret > 0) {
3106                 ret = -ENOENT;
3107                 goto out;
3108         }
3109         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3110                                     struct btrfs_inode_item);
3111         btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
3112         btrfs_mark_buffer_dirty(path->nodes[0]);
3113         btrfs_release_path(path);
3114
3115         /*
3116          * Add back valid inode_ref/dir_item/dir_index,
3117          * add_link() will handle the nlink inc, so new nlink must be correct
3118          */
3119         list_for_each_entry(backref, &rec->backrefs, list) {
3120                 ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
3121                                      backref->name, backref->namelen,
3122                                      backref->filetype, &backref->index, 1, 0);
3123                 if (ret < 0)
3124                         goto out;
3125         }
3126 out:
3127         btrfs_release_path(path);
3128         return ret;
3129 }
3130
3131 static int get_highest_inode(struct btrfs_trans_handle *trans,
3132                                 struct btrfs_root *root,
3133                                 struct btrfs_path *path,
3134                                 u64 *highest_ino)
3135 {
3136         struct btrfs_key key, found_key;
3137         int ret;
3138
3139         btrfs_init_path(path);
3140         key.objectid = BTRFS_LAST_FREE_OBJECTID;
3141         key.offset = -1;
3142         key.type = BTRFS_INODE_ITEM_KEY;
3143         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3144         if (ret == 1) {
3145                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
3146                                 path->slots[0] - 1);
3147                 *highest_ino = found_key.objectid;
3148                 ret = 0;
3149         }
3150         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
3151                 ret = -EOVERFLOW;
3152         btrfs_release_path(path);
3153         return ret;
3154 }
3155
3156 /*
3157  * Link inode to dir 'lost+found'. Increase @ref_count.
3158  *
3159  * Returns 0 means success.
3160  * Returns <0 means failure.
3161  */
3162 static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
3163                                    struct btrfs_root *root,
3164                                    struct btrfs_path *path,
3165                                    u64 ino, char *namebuf, u32 name_len,
3166                                    u8 filetype, u64 *ref_count)
3167 {
3168         char *dir_name = "lost+found";
3169         u64 lost_found_ino;
3170         int ret;
3171         u32 mode = 0700;
3172
3173         btrfs_release_path(path);
3174         ret = get_highest_inode(trans, root, path, &lost_found_ino);
3175         if (ret < 0)
3176                 goto out;
3177         lost_found_ino++;
3178
3179         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
3180                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
3181                           mode);
3182         if (ret < 0) {
3183                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
3184                 goto out;
3185         }
3186         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
3187                              namebuf, name_len, filetype, NULL, 1, 0);
3188         /*
3189          * Add ".INO" suffix several times to handle case where
3190          * "FILENAME.INO" is already taken by another file.
3191          */
3192         while (ret == -EEXIST) {
3193                 /*
3194                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
3195                  */
3196                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
3197                         ret = -EFBIG;
3198                         goto out;
3199                 }
3200                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
3201                          ".%llu", ino);
3202                 name_len += count_digits(ino) + 1;
3203                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
3204                                      name_len, filetype, NULL, 1, 0);
3205         }
3206         if (ret < 0) {
3207                 error("failed to link the inode %llu to %s dir: %s",
3208                       ino, dir_name, strerror(-ret));
3209                 goto out;
3210         }
3211
3212         ++*ref_count;
3213         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
3214                name_len, namebuf, dir_name);
3215 out:
3216         btrfs_release_path(path);
3217         if (ret)
3218                 error("failed to move file '%.*s' to '%s' dir", name_len,
3219                                 namebuf, dir_name);
3220         return ret;
3221 }
3222
3223 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
3224                                struct btrfs_root *root,
3225                                struct btrfs_path *path,
3226                                struct inode_record *rec)
3227 {
3228         char namebuf[BTRFS_NAME_LEN] = {0};
3229         u8 type = 0;
3230         int namelen = 0;
3231         int name_recovered = 0;
3232         int type_recovered = 0;
3233         int ret = 0;
3234
3235         /*
3236          * Get file name and type first before these invalid inode ref
3237          * are deleted by remove_all_invalid_backref()
3238          */
3239         name_recovered = !find_file_name(rec, namebuf, &namelen);
3240         type_recovered = !find_file_type(rec, &type);
3241
3242         if (!name_recovered) {
3243                 printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
3244                        rec->ino, rec->ino);
3245                 namelen = count_digits(rec->ino);
3246                 sprintf(namebuf, "%llu", rec->ino);
3247                 name_recovered = 1;
3248         }
3249         if (!type_recovered) {
3250                 printf("Can't get file type for inode %llu, using FILE as fallback\n",
3251                        rec->ino);
3252                 type = BTRFS_FT_REG_FILE;
3253                 type_recovered = 1;
3254         }
3255
3256         ret = reset_nlink(trans, root, path, rec);
3257         if (ret < 0) {
3258                 fprintf(stderr,
3259                         "Failed to reset nlink for inode %llu: %s\n",
3260                         rec->ino, strerror(-ret));
3261                 goto out;
3262         }
3263
3264         if (rec->found_link == 0) {
3265                 ret = link_inode_to_lostfound(trans, root, path, rec->ino,
3266                                               namebuf, namelen, type,
3267                                               (u64 *)&rec->found_link);
3268                 if (ret)
3269                         goto out;
3270         }
3271         printf("Fixed the nlink of inode %llu\n", rec->ino);
3272 out:
3273         /*
3274          * Clear the flag anyway, or we will loop forever for the same inode
3275          * as it will not be removed from the bad inode list and the dead loop
3276          * happens.
3277          */
3278         rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
3279         btrfs_release_path(path);
3280         return ret;
3281 }
3282
3283 /*
3284  * Check if there is any normal(reg or prealloc) file extent for given
3285  * ino.
3286  * This is used to determine the file type when neither its dir_index/item or
3287  * inode_item exists.
3288  *
3289  * This will *NOT* report error, if any error happens, just consider it does
3290  * not have any normal file extent.
3291  */
3292 static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
3293 {
3294         struct btrfs_path path;
3295         struct btrfs_key key;
3296         struct btrfs_key found_key;
3297         struct btrfs_file_extent_item *fi;
3298         u8 type;
3299         int ret = 0;
3300
3301         btrfs_init_path(&path);
3302         key.objectid = ino;
3303         key.type = BTRFS_EXTENT_DATA_KEY;
3304         key.offset = 0;
3305
3306         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
3307         if (ret < 0) {
3308                 ret = 0;
3309                 goto out;
3310         }
3311         if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
3312                 ret = btrfs_next_leaf(root, &path);
3313                 if (ret) {
3314                         ret = 0;
3315                         goto out;
3316                 }
3317         }
3318         while (1) {
3319                 btrfs_item_key_to_cpu(path.nodes[0], &found_key,
3320                                       path.slots[0]);
3321                 if (found_key.objectid != ino ||
3322                     found_key.type != BTRFS_EXTENT_DATA_KEY)
3323                         break;
3324                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
3325                                     struct btrfs_file_extent_item);
3326                 type = btrfs_file_extent_type(path.nodes[0], fi);
3327                 if (type != BTRFS_FILE_EXTENT_INLINE) {
3328                         ret = 1;
3329                         goto out;
3330                 }
3331         }
3332 out:
3333         btrfs_release_path(&path);
3334         return ret;
3335 }
3336
3337 static u32 btrfs_type_to_imode(u8 type)
3338 {
3339         static u32 imode_by_btrfs_type[] = {
3340                 [BTRFS_FT_REG_FILE]     = S_IFREG,
3341                 [BTRFS_FT_DIR]          = S_IFDIR,
3342                 [BTRFS_FT_CHRDEV]       = S_IFCHR,
3343                 [BTRFS_FT_BLKDEV]       = S_IFBLK,
3344                 [BTRFS_FT_FIFO]         = S_IFIFO,
3345                 [BTRFS_FT_SOCK]         = S_IFSOCK,
3346                 [BTRFS_FT_SYMLINK]      = S_IFLNK,
3347         };
3348
3349         return imode_by_btrfs_type[(type)];
3350 }
3351
3352 static int repair_inode_no_item(struct btrfs_trans_handle *trans,
3353                                 struct btrfs_root *root,
3354                                 struct btrfs_path *path,
3355                                 struct inode_record *rec)
3356 {
3357         u8 filetype;
3358         u32 mode = 0700;
3359         int type_recovered = 0;
3360         int ret = 0;
3361
3362         printf("Trying to rebuild inode:%llu\n", rec->ino);
3363
3364         type_recovered = !find_file_type(rec, &filetype);
3365
3366         /*
3367          * Try to determine inode type if type not found.
3368          *
3369          * For found regular file extent, it must be FILE.
3370          * For found dir_item/index, it must be DIR.
3371          *
3372          * For undetermined one, use FILE as fallback.
3373          *
3374          * TODO:
3375          * 1. If found backref(inode_index/item is already handled) to it,
3376          *    it must be DIR.
3377          *    Need new inode-inode ref structure to allow search for that.
3378          */
3379         if (!type_recovered) {
3380                 if (rec->found_file_extent &&
3381                     find_normal_file_extent(root, rec->ino)) {
3382                         type_recovered = 1;
3383                         filetype = BTRFS_FT_REG_FILE;
3384                 } else if (rec->found_dir_item) {
3385                         type_recovered = 1;
3386                         filetype = BTRFS_FT_DIR;
3387                 } else if (!list_empty(&rec->orphan_extents)) {
3388                         type_recovered = 1;
3389                         filetype = BTRFS_FT_REG_FILE;
3390                 } else{
3391                         printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
3392                                rec->ino);
3393                         type_recovered = 1;
3394                         filetype = BTRFS_FT_REG_FILE;
3395                 }
3396         }
3397
3398         ret = btrfs_new_inode(trans, root, rec->ino,
3399                               mode | btrfs_type_to_imode(filetype));
3400         if (ret < 0)
3401                 goto out;
3402
3403         /*
3404          * Here inode rebuild is done, we only rebuild the inode item,
3405          * don't repair the nlink(like move to lost+found).
3406          * That is the job of nlink repair.
3407          *
3408          * We just fill the record and return
3409          */
3410         rec->found_dir_item = 1;
3411         rec->imode = mode | btrfs_type_to_imode(filetype);
3412         rec->nlink = 0;
3413         rec->errors &= ~I_ERR_NO_INODE_ITEM;
3414         /* Ensure the inode_nlinks repair function will be called */
3415         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3416 out:
3417         return ret;
3418 }
3419
3420 static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
3421                                       struct btrfs_root *root,
3422                                       struct btrfs_path *path,
3423                                       struct inode_record *rec)
3424 {
3425         struct orphan_data_extent *orphan;
3426         struct orphan_data_extent *tmp;
3427         int ret = 0;
3428
3429         list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
3430                 /*
3431                  * Check for conflicting file extents
3432                  *
3433                  * Here we don't know whether the extents is compressed or not,
3434                  * so we can only assume it not compressed nor data offset,
3435                  * and use its disk_len as extent length.
3436                  */
3437                 ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
3438                                        orphan->offset, orphan->disk_len, 0);
3439                 btrfs_release_path(path);
3440                 if (ret < 0)
3441                         goto out;
3442                 if (!ret) {
3443                         fprintf(stderr,
3444                                 "orphan extent (%llu, %llu) conflicts, delete the orphan\n",
3445                                 orphan->disk_bytenr, orphan->disk_len);
3446                         ret = btrfs_free_extent(trans,
3447                                         root->fs_info->extent_root,
3448                                         orphan->disk_bytenr, orphan->disk_len,
3449                                         0, root->objectid, orphan->objectid,
3450                                         orphan->offset);
3451                         if (ret < 0)
3452                                 goto out;
3453                 }
3454                 ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
3455                                 orphan->offset, orphan->disk_bytenr,
3456                                 orphan->disk_len, orphan->disk_len);
3457                 if (ret < 0)
3458                         goto out;
3459
3460                 /* Update file size info */
3461                 rec->found_size += orphan->disk_len;
3462                 if (rec->found_size == rec->nbytes)
3463                         rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
3464
3465                 /* Update the file extent hole info too */
3466                 ret = del_file_extent_hole(&rec->holes, orphan->offset,
3467                                            orphan->disk_len);
3468                 if (ret < 0)
3469                         goto out;
3470                 if (RB_EMPTY_ROOT(&rec->holes))
3471                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3472
3473                 list_del(&orphan->list);
3474                 free(orphan);
3475         }
3476         rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
3477 out:
3478         return ret;
3479 }
3480
3481 static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
3482                                         struct btrfs_root *root,
3483                                         struct btrfs_path *path,
3484                                         struct inode_record *rec)
3485 {
3486         struct rb_node *node;
3487         struct file_extent_hole *hole;
3488         int found = 0;
3489         int ret = 0;
3490
3491         node = rb_first(&rec->holes);
3492
3493         while (node) {
3494                 found = 1;
3495                 hole = rb_entry(node, struct file_extent_hole, node);
3496                 ret = btrfs_punch_hole(trans, root, rec->ino,
3497                                        hole->start, hole->len);
3498                 if (ret < 0)
3499                         goto out;
3500                 ret = del_file_extent_hole(&rec->holes, hole->start,
3501                                            hole->len);
3502                 if (ret < 0)
3503                         goto out;
3504                 if (RB_EMPTY_ROOT(&rec->holes))
3505                         rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
3506                 node = rb_first(&rec->holes);
3507         }
3508         /* special case for a file losing all its file extent */
3509         if (!found) {
3510                 ret = btrfs_punch_hole(trans, root, rec->ino, 0,
3511                                        round_up(rec->isize,
3512                                                 root->fs_info->sectorsize));
3513                 if (ret < 0)
3514                         goto out;
3515         }
3516         printf("Fixed discount file extents for inode: %llu in root: %llu\n",
3517                rec->ino, root->objectid);
3518 out:
3519         return ret;
3520 }
3521
3522 static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
3523 {
3524         struct btrfs_trans_handle *trans;
3525         struct btrfs_path path;
3526         int ret = 0;
3527
3528         if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
3529                              I_ERR_NO_ORPHAN_ITEM |
3530                              I_ERR_LINK_COUNT_WRONG |
3531                              I_ERR_NO_INODE_ITEM |
3532                              I_ERR_FILE_EXTENT_ORPHAN |
3533                              I_ERR_FILE_EXTENT_DISCOUNT|
3534                              I_ERR_FILE_NBYTES_WRONG)))
3535                 return rec->errors;
3536
3537         /*
3538          * For nlink repair, it may create a dir and add link, so
3539          * 2 for parent(256)'s dir_index and dir_item
3540          * 2 for lost+found dir's inode_item and inode_ref
3541          * 1 for the new inode_ref of the file
3542          * 2 for lost+found dir's dir_index and dir_item for the file
3543          */
3544         trans = btrfs_start_transaction(root, 7);
3545         if (IS_ERR(trans))
3546                 return PTR_ERR(trans);
3547
3548         btrfs_init_path(&path);
3549         if (rec->errors & I_ERR_NO_INODE_ITEM)
3550                 ret = repair_inode_no_item(trans, root, &path, rec);
3551         if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
3552                 ret = repair_inode_orphan_extent(trans, root, &path, rec);
3553         if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
3554                 ret = repair_inode_discount_extent(trans, root, &path, rec);
3555         if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
3556                 ret = repair_inode_isize(trans, root, &path, rec);
3557         if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
3558                 ret = repair_inode_orphan_item(trans, root, &path, rec);
3559         if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
3560                 ret = repair_inode_nlinks(trans, root, &path, rec);
3561         if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
3562                 ret = repair_inode_nbytes(trans, root, &path, rec);
3563         btrfs_commit_transaction(trans, root);
3564         btrfs_release_path(&path);
3565         return ret;
3566 }
3567
3568 static int check_inode_recs(struct btrfs_root *root,
3569                             struct cache_tree *inode_cache)
3570 {
3571         struct cache_extent *cache;
3572         struct ptr_node *node;
3573         struct inode_record *rec;
3574         struct inode_backref *backref;
3575         int stage = 0;
3576         int ret = 0;
3577         int err = 0;
3578         u64 error = 0;
3579         u64 root_dirid = btrfs_root_dirid(&root->root_item);
3580
3581         if (btrfs_root_refs(&root->root_item) == 0) {
3582                 if (!cache_tree_empty(inode_cache))
3583                         fprintf(stderr, "warning line %d\n", __LINE__);
3584                 return 0;
3585         }
3586
3587         /*
3588          * We need to repair backrefs first because we could change some of the
3589          * errors in the inode recs.
3590          *
3591          * We also need to go through and delete invalid backrefs first and then
3592          * add the correct ones second.  We do this because we may get EEXIST
3593          * when adding back the correct index because we hadn't yet deleted the
3594          * invalid index.
3595          *
3596          * For example, if we were missing a dir index then the directories
3597          * isize would be wrong, so if we fixed the isize to what we thought it
3598          * would be and then fixed the backref we'd still have a invalid fs, so
3599          * we need to add back the dir index and then check to see if the isize
3600          * is still wrong.
3601          */
3602         while (stage < 3) {
3603                 stage++;
3604                 if (stage == 3 && !err)
3605                         break;
3606
3607                 cache = search_cache_extent(inode_cache, 0);
3608                 while (repair && cache) {
3609                         node = container_of(cache, struct ptr_node, cache);
3610                         rec = node->data;
3611                         cache = next_cache_extent(cache);
3612
3613                         /* Need to free everything up and rescan */
3614                         if (stage == 3) {
3615                                 remove_cache_extent(inode_cache, &node->cache);
3616                                 free(node);
3617                                 free_inode_rec(rec);
3618                                 continue;
3619                         }
3620
3621                         if (list_empty(&rec->backrefs))
3622                                 continue;
3623
3624                         ret = repair_inode_backrefs(root, rec, inode_cache,
3625                                                     stage == 1);
3626                         if (ret < 0) {
3627                                 err = ret;
3628                                 stage = 2;
3629                                 break;
3630                         } if (ret > 0) {
3631                                 err = -EAGAIN;
3632                         }
3633                 }
3634         }
3635         if (err)
3636                 return err;
3637
3638         rec = get_inode_rec(inode_cache, root_dirid, 0);
3639         BUG_ON(IS_ERR(rec));
3640         if (rec) {
3641                 ret = check_root_dir(rec);
3642                 if (ret) {
3643                         fprintf(stderr, "root %llu root dir %llu error\n",
3644                                 (unsigned long long)root->root_key.objectid,
3645                                 (unsigned long long)root_dirid);
3646                         print_inode_error(root, rec);
3647                         error++;
3648                 }
3649         } else {
3650                 if (repair) {
3651                         struct btrfs_trans_handle *trans;
3652
3653                         trans = btrfs_start_transaction(root, 1);
3654                         if (IS_ERR(trans)) {
3655                                 err = PTR_ERR(trans);
3656                                 return err;
3657                         }
3658
3659                         fprintf(stderr,
3660                                 "root %llu missing its root dir, recreating\n",
3661                                 (unsigned long long)root->objectid);
3662
3663                         ret = btrfs_make_root_dir(trans, root, root_dirid);
3664                         BUG_ON(ret);
3665
3666                         btrfs_commit_transaction(trans, root);
3667                         return -EAGAIN;
3668                 }
3669
3670                 fprintf(stderr, "root %llu root dir %llu not found\n",
3671                         (unsigned long long)root->root_key.objectid,
3672                         (unsigned long long)root_dirid);
3673         }
3674
3675         while (1) {
3676                 cache = search_cache_extent(inode_cache, 0);
3677                 if (!cache)
3678                         break;
3679                 node = container_of(cache, struct ptr_node, cache);
3680                 rec = node->data;
3681                 remove_cache_extent(inode_cache, &node->cache);
3682                 free(node);
3683                 if (rec->ino == root_dirid ||
3684                     rec->ino == BTRFS_ORPHAN_OBJECTID) {
3685                         free_inode_rec(rec);
3686                         continue;
3687                 }
3688
3689                 if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
3690                         ret = check_orphan_item(root, rec->ino);
3691                         if (ret == 0)
3692                                 rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
3693                         if (can_free_inode_rec(rec)) {
3694                                 free_inode_rec(rec);
3695                                 continue;
3696                         }
3697                 }
3698
3699                 if (!rec->found_inode_item)
3700                         rec->errors |= I_ERR_NO_INODE_ITEM;
3701                 if (rec->found_link != rec->nlink)
3702                         rec->errors |= I_ERR_LINK_COUNT_WRONG;
3703                 if (repair) {
3704                         ret = try_repair_inode(root, rec);
3705                         if (ret == 0 && can_free_inode_rec(rec)) {
3706                                 free_inode_rec(rec);
3707                                 continue;
3708                         }
3709                         ret = 0;
3710                 }
3711
3712                 if (!(repair && ret == 0))
3713                         error++;
3714                 print_inode_error(root, rec);
3715                 list_for_each_entry(backref, &rec->backrefs, list) {
3716                         if (!backref->found_dir_item)
3717                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3718                         if (!backref->found_dir_index)
3719                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3720                         if (!backref->found_inode_ref)
3721                                 backref->errors |= REF_ERR_NO_INODE_REF;
3722                         fprintf(stderr, "\tunresolved ref dir %llu index %llu"
3723                                 " namelen %u name %s filetype %d errors %x",
3724                                 (unsigned long long)backref->dir,
3725                                 (unsigned long long)backref->index,
3726                                 backref->namelen, backref->name,
3727                                 backref->filetype, backref->errors);
3728                         print_ref_error(backref->errors);
3729                 }
3730                 free_inode_rec(rec);
3731         }
3732         return (error > 0) ? -1 : 0;
3733 }
3734
3735 static struct root_record *get_root_rec(struct cache_tree *root_cache,
3736                                         u64 objectid)
3737 {
3738         struct cache_extent *cache;
3739         struct root_record *rec = NULL;
3740         int ret;
3741
3742         cache = lookup_cache_extent(root_cache, objectid, 1);
3743         if (cache) {
3744                 rec = container_of(cache, struct root_record, cache);
3745         } else {
3746                 rec = calloc(1, sizeof(*rec));
3747                 if (!rec)
3748                         return ERR_PTR(-ENOMEM);
3749                 rec->objectid = objectid;
3750                 INIT_LIST_HEAD(&rec->backrefs);
3751                 rec->cache.start = objectid;
3752                 rec->cache.size = 1;
3753
3754                 ret = insert_cache_extent(root_cache, &rec->cache);
3755                 if (ret)
3756                         return ERR_PTR(-EEXIST);
3757         }
3758         return rec;
3759 }
3760
3761 static struct root_backref *get_root_backref(struct root_record *rec,
3762                                              u64 ref_root, u64 dir, u64 index,
3763                                              const char *name, int namelen)
3764 {
3765         struct root_backref *backref;
3766
3767         list_for_each_entry(backref, &rec->backrefs, list) {
3768                 if (backref->ref_root != ref_root || backref->dir != dir ||
3769                     backref->namelen != namelen)
3770                         continue;
3771                 if (memcmp(name, backref->name, namelen))
3772                         continue;
3773                 return backref;
3774         }
3775
3776         backref = calloc(1, sizeof(*backref) + namelen + 1);
3777         if (!backref)
3778                 return NULL;
3779         backref->ref_root = ref_root;
3780         backref->dir = dir;
3781         backref->index = index;
3782         backref->namelen = namelen;
3783         memcpy(backref->name, name, namelen);
3784         backref->name[namelen] = '\0';
3785         list_add_tail(&backref->list, &rec->backrefs);
3786         return backref;
3787 }
3788
3789 static void free_root_record(struct cache_extent *cache)
3790 {
3791         struct root_record *rec;
3792         struct root_backref *backref;
3793
3794         rec = container_of(cache, struct root_record, cache);
3795         while (!list_empty(&rec->backrefs)) {
3796                 backref = to_root_backref(rec->backrefs.next);
3797                 list_del(&backref->list);
3798                 free(backref);
3799         }
3800
3801         free(rec);
3802 }
3803
3804 FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
3805
3806 static int add_root_backref(struct cache_tree *root_cache,
3807                             u64 root_id, u64 ref_root, u64 dir, u64 index,
3808                             const char *name, int namelen,
3809                             int item_type, int errors)
3810 {
3811         struct root_record *rec;
3812         struct root_backref *backref;
3813
3814         rec = get_root_rec(root_cache, root_id);
3815         BUG_ON(IS_ERR(rec));
3816         backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
3817         BUG_ON(!backref);
3818
3819         backref->errors |= errors;
3820
3821         if (item_type != BTRFS_DIR_ITEM_KEY) {
3822                 if (backref->found_dir_index || backref->found_back_ref ||
3823                     backref->found_forward_ref) {
3824                         if (backref->index != index)
3825                                 backref->errors |= REF_ERR_INDEX_UNMATCH;
3826                 } else {
3827                         backref->index = index;
3828                 }
3829         }
3830
3831         if (item_type == BTRFS_DIR_ITEM_KEY) {
3832                 if (backref->found_forward_ref)
3833                         rec->found_ref++;
3834                 backref->found_dir_item = 1;
3835         } else if (item_type == BTRFS_DIR_INDEX_KEY) {
3836                 backref->found_dir_index = 1;
3837         } else if (item_type == BTRFS_ROOT_REF_KEY) {
3838                 if (backref->found_forward_ref)
3839                         backref->errors |= REF_ERR_DUP_ROOT_REF;
3840                 else if (backref->found_dir_item)
3841                         rec->found_ref++;
3842                 backref->found_forward_ref = 1;
3843         } else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
3844                 if (backref->found_back_ref)
3845                         backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
3846                 backref->found_back_ref = 1;
3847         } else {
3848                 BUG_ON(1);
3849         }
3850
3851         if (backref->found_forward_ref && backref->found_dir_item)
3852                 backref->reachable = 1;
3853         return 0;
3854 }
3855
3856 static int merge_root_recs(struct btrfs_root *root,
3857                            struct cache_tree *src_cache,
3858                            struct cache_tree *dst_cache)
3859 {
3860         struct cache_extent *cache;
3861         struct ptr_node *node;
3862         struct inode_record *rec;
3863         struct inode_backref *backref;
3864         int ret = 0;
3865
3866         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
3867                 free_inode_recs_tree(src_cache);
3868                 return 0;
3869         }
3870
3871         while (1) {
3872                 cache = search_cache_extent(src_cache, 0);
3873                 if (!cache)
3874                         break;
3875                 node = container_of(cache, struct ptr_node, cache);
3876                 rec = node->data;
3877                 remove_cache_extent(src_cache, &node->cache);
3878                 free(node);
3879
3880                 ret = is_child_root(root, root->objectid, rec->ino);
3881                 if (ret < 0)
3882                         break;
3883                 else if (ret == 0)
3884                         goto skip;
3885
3886                 list_for_each_entry(backref, &rec->backrefs, list) {
3887                         BUG_ON(backref->found_inode_ref);
3888                         if (backref->found_dir_item)
3889                                 add_root_backref(dst_cache, rec->ino,
3890                                         root->root_key.objectid, backref->dir,
3891                                         backref->index, backref->name,
3892                                         backref->namelen, BTRFS_DIR_ITEM_KEY,
3893                                         backref->errors);
3894                         if (backref->found_dir_index)
3895                                 add_root_backref(dst_cache, rec->ino,
3896                                         root->root_key.objectid, backref->dir,
3897                                         backref->index, backref->name,
3898                                         backref->namelen, BTRFS_DIR_INDEX_KEY,
3899                                         backref->errors);
3900                 }
3901 skip:
3902                 free_inode_rec(rec);
3903         }
3904         if (ret < 0)
3905                 return ret;
3906         return 0;
3907 }
3908
3909 static int check_root_refs(struct btrfs_root *root,
3910                            struct cache_tree *root_cache)
3911 {
3912         struct root_record *rec;
3913         struct root_record *ref_root;
3914         struct root_backref *backref;
3915         struct cache_extent *cache;
3916         int loop = 1;
3917         int ret;
3918         int error;
3919         int errors = 0;
3920
3921         rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
3922         BUG_ON(IS_ERR(rec));
3923         rec->found_ref = 1;
3924
3925         /* fixme: this can not detect circular references */
3926         while (loop) {
3927                 loop = 0;
3928                 cache = search_cache_extent(root_cache, 0);
3929                 while (1) {
3930                         if (!cache)
3931                                 break;
3932                         rec = container_of(cache, struct root_record, cache);
3933                         cache = next_cache_extent(cache);
3934
3935                         if (rec->found_ref == 0)
3936                                 continue;
3937
3938                         list_for_each_entry(backref, &rec->backrefs, list) {
3939                                 if (!backref->reachable)
3940                                         continue;
3941
3942                                 ref_root = get_root_rec(root_cache,
3943                                                         backref->ref_root);
3944                                 BUG_ON(IS_ERR(ref_root));
3945                                 if (ref_root->found_ref > 0)
3946                                         continue;
3947
3948                                 backref->reachable = 0;
3949                                 rec->found_ref--;
3950                                 if (rec->found_ref == 0)
3951                                         loop = 1;
3952                         }
3953                 }
3954         }
3955
3956         cache = search_cache_extent(root_cache, 0);
3957         while (1) {
3958                 if (!cache)
3959                         break;
3960                 rec = container_of(cache, struct root_record, cache);
3961                 cache = next_cache_extent(cache);
3962
3963                 if (rec->found_ref == 0 &&
3964                     rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
3965                     rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
3966                         ret = check_orphan_item(root->fs_info->tree_root,
3967                                                 rec->objectid);
3968                         if (ret == 0)
3969                                 continue;
3970
3971                         /*
3972                          * If we don't have a root item then we likely just have
3973                          * a dir item in a snapshot for this root but no actual
3974                          * ref key or anything so it's meaningless.
3975                          */
3976                         if (!rec->found_root_item)
3977                                 continue;
3978                         errors++;
3979                         fprintf(stderr, "fs tree %llu not referenced\n",
3980                                 (unsigned long long)rec->objectid);
3981                 }
3982
3983                 error = 0;
3984                 if (rec->found_ref > 0 && !rec->found_root_item)
3985                         error = 1;
3986                 list_for_each_entry(backref, &rec->backrefs, list) {
3987                         if (!backref->found_dir_item)
3988                                 backref->errors |= REF_ERR_NO_DIR_ITEM;
3989                         if (!backref->found_dir_index)
3990                                 backref->errors |= REF_ERR_NO_DIR_INDEX;
3991                         if (!backref->found_back_ref)
3992                                 backref->errors |= REF_ERR_NO_ROOT_BACKREF;
3993                         if (!backref->found_forward_ref)
3994                                 backref->errors |= REF_ERR_NO_ROOT_REF;
3995                         if (backref->reachable && backref->errors)
3996                                 error = 1;
3997                 }
3998                 if (!error)
3999                         continue;
4000
4001                 errors++;
4002                 fprintf(stderr, "fs tree %llu refs %u %s\n",
4003                         (unsigned long long)rec->objectid, rec->found_ref,
4004                          rec->found_root_item ? "" : "not found");
4005
4006                 list_for_each_entry(backref, &rec->backrefs, list) {
4007                         if (!backref->reachable)
4008                                 continue;
4009                         if (!backref->errors && rec->found_root_item)
4010                                 continue;
4011                         fprintf(stderr, "\tunresolved ref root %llu dir %llu"
4012                                 " index %llu namelen %u name %s errors %x\n",
4013                                 (unsigned long long)backref->ref_root,
4014                                 (unsigned long long)backref->dir,
4015                                 (unsigned long long)backref->index,
4016                                 backref->namelen, backref->name,
4017                                 backref->errors);
4018                         print_ref_error(backref->errors);
4019                 }
4020         }
4021         return errors > 0 ? 1 : 0;
4022 }
4023
4024 static int process_root_ref(struct extent_buffer *eb, int slot,
4025                             struct btrfs_key *key,
4026                             struct cache_tree *root_cache)
4027 {
4028         u64 dirid;
4029         u64 index;
4030         u32 len;
4031         u32 name_len;
4032         struct btrfs_root_ref *ref;
4033         char namebuf[BTRFS_NAME_LEN];
4034         int error;
4035
4036         ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
4037
4038         dirid = btrfs_root_ref_dirid(eb, ref);
4039         index = btrfs_root_ref_sequence(eb, ref);
4040         name_len = btrfs_root_ref_name_len(eb, ref);
4041
4042         if (name_len <= BTRFS_NAME_LEN) {
4043                 len = name_len;
4044                 error = 0;
4045         } else {
4046                 len = BTRFS_NAME_LEN;
4047                 error = REF_ERR_NAME_TOO_LONG;
4048         }
4049         read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
4050
4051         if (key->type == BTRFS_ROOT_REF_KEY) {
4052                 add_root_backref(root_cache, key->offset, key->objectid, dirid,
4053                                  index, namebuf, len, key->type, error);
4054         } else {
4055                 add_root_backref(root_cache, key->objectid, key->offset, dirid,
4056                                  index, namebuf, len, key->type, error);
4057         }
4058         return 0;
4059 }
4060
4061 static void free_corrupt_block(struct cache_extent *cache)
4062 {
4063         struct btrfs_corrupt_block *corrupt;
4064
4065         corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
4066         free(corrupt);
4067 }
4068
4069 FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
4070
4071 /*
4072  * Repair the btree of the given root.
4073  *
4074  * The fix is to remove the node key in corrupt_blocks cache_tree.
4075  * and rebalance the tree.
4076  * After the fix, the btree should be writeable.
4077  */
4078 static int repair_btree(struct btrfs_root *root,
4079                         struct cache_tree *corrupt_blocks)
4080 {
4081         struct btrfs_trans_handle *trans;
4082         struct btrfs_path path;
4083         struct btrfs_corrupt_block *corrupt;
4084         struct cache_extent *cache;
4085         struct btrfs_key key;
4086         u64 offset;
4087         int level;
4088         int ret = 0;
4089
4090         if (cache_tree_empty(corrupt_blocks))
4091                 return 0;
4092
4093         trans = btrfs_start_transaction(root, 1);
4094         if (IS_ERR(trans)) {
4095                 ret = PTR_ERR(trans);
4096                 fprintf(stderr, "Error starting transaction: %s\n",
4097                         strerror(-ret));
4098                 return ret;
4099         }
4100         btrfs_init_path(&path);
4101         cache = first_cache_extent(corrupt_blocks);
4102         while (cache) {
4103                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4104                                        cache);
4105                 level = corrupt->level;
4106                 path.lowest_level = level;
4107                 key.objectid = corrupt->key.objectid;
4108                 key.type = corrupt->key.type;
4109                 key.offset = corrupt->key.offset;
4110
4111                 /*
4112                  * Here we don't want to do any tree balance, since it may
4113                  * cause a balance with corrupted brother leaf/node,
4114                  * so ins_len set to 0 here.
4115                  * Balance will be done after all corrupt node/leaf is deleted.
4116                  */
4117                 ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
4118                 if (ret < 0)
4119                         goto out;
4120                 offset = btrfs_node_blockptr(path.nodes[level],
4121                                              path.slots[level]);
4122
4123                 /* Remove the ptr */
4124                 ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
4125                 if (ret < 0)
4126                         goto out;
4127                 /*
4128                  * Remove the corresponding extent
4129                  * return value is not concerned.
4130                  */
4131                 btrfs_release_path(&path);
4132                 ret = btrfs_free_extent(trans, root, offset,
4133                                 root->fs_info->nodesize, 0,
4134                                 root->root_key.objectid, level - 1, 0);
4135                 cache = next_cache_extent(cache);
4136         }
4137
4138         /* Balance the btree using btrfs_search_slot() */
4139         cache = first_cache_extent(corrupt_blocks);
4140         while (cache) {
4141                 corrupt = container_of(cache, struct btrfs_corrupt_block,
4142                                        cache);
4143                 memcpy(&key, &corrupt->key, sizeof(key));
4144                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
4145                 if (ret < 0)
4146                         goto out;
4147                 /* return will always >0 since it won't find the item */
4148                 ret = 0;
4149                 btrfs_release_path(&path);
4150                 cache = next_cache_extent(cache);
4151         }
4152 out:
4153         btrfs_commit_transaction(trans, root);
4154         btrfs_release_path(&path);
4155         return ret;
4156 }
4157
4158 static int check_fs_root(struct btrfs_root *root,
4159                          struct cache_tree *root_cache,
4160                          struct walk_control *wc)
4161 {
4162         int ret = 0;
4163         int err = 0;
4164         int wret;
4165         int level;
4166         struct btrfs_path path;
4167         struct shared_node root_node;
4168         struct root_record *rec;
4169         struct btrfs_root_item *root_item = &root->root_item;
4170         struct cache_tree corrupt_blocks;
4171         struct orphan_data_extent *orphan;
4172         struct orphan_data_extent *tmp;
4173         enum btrfs_tree_block_status status;
4174         struct node_refs nrefs;
4175
4176         /*
4177          * Reuse the corrupt_block cache tree to record corrupted tree block
4178          *
4179          * Unlike the usage in extent tree check, here we do it in a per
4180          * fs/subvol tree base.
4181          */
4182         cache_tree_init(&corrupt_blocks);
4183         root->fs_info->corrupt_blocks = &corrupt_blocks;
4184
4185         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
4186                 rec = get_root_rec(root_cache, root->root_key.objectid);
4187                 BUG_ON(IS_ERR(rec));
4188                 if (btrfs_root_refs(root_item) > 0)
4189                         rec->found_root_item = 1;
4190         }
4191
4192         btrfs_init_path(&path);
4193         memset(&root_node, 0, sizeof(root_node));
4194         cache_tree_init(&root_node.root_cache);
4195         cache_tree_init(&root_node.inode_cache);
4196         memset(&nrefs, 0, sizeof(nrefs));
4197
4198         /* Move the orphan extent record to corresponding inode_record */
4199         list_for_each_entry_safe(orphan, tmp,
4200                                  &root->orphan_data_extents, list) {
4201                 struct inode_record *inode;
4202
4203                 inode = get_inode_rec(&root_node.inode_cache, orphan->objectid,
4204                                       1);
4205                 BUG_ON(IS_ERR(inode));
4206                 inode->errors |= I_ERR_FILE_EXTENT_ORPHAN;
4207                 list_move(&orphan->list, &inode->orphan_extents);
4208         }
4209
4210         level = btrfs_header_level(root->node);
4211         memset(wc->nodes, 0, sizeof(wc->nodes));
4212         wc->nodes[level] = &root_node;
4213         wc->active_node = level;
4214         wc->root_level = level;
4215
4216         /* We may not have checked the root block, lets do that now */
4217         if (btrfs_is_leaf(root->node))
4218                 status = btrfs_check_leaf(root, NULL, root->node);
4219         else
4220                 status = btrfs_check_node(root, NULL, root->node);
4221         if (status != BTRFS_TREE_BLOCK_CLEAN)
4222                 return -EIO;
4223
4224         if (btrfs_root_refs(root_item) > 0 ||
4225             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4226                 path.nodes[level] = root->node;
4227                 extent_buffer_get(root->node);
4228                 path.slots[level] = 0;
4229         } else {
4230                 struct btrfs_key key;
4231                 struct btrfs_disk_key found_key;
4232
4233                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4234                 level = root_item->drop_level;
4235                 path.lowest_level = level;
4236                 if (level > btrfs_header_level(root->node) ||
4237                     level >= BTRFS_MAX_LEVEL) {
4238                         error("ignoring invalid drop level: %u", level);
4239                         goto skip_walking;
4240                 }
4241                 wret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4242                 if (wret < 0)
4243                         goto skip_walking;
4244                 btrfs_node_key(path.nodes[level], &found_key,
4245                                 path.slots[level]);
4246                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
4247                                         sizeof(found_key)));
4248         }
4249
4250         while (1) {
4251                 wret = walk_down_tree(root, &path, wc, &level, &nrefs);
4252                 if (wret < 0)
4253                         ret = wret;
4254                 if (wret != 0)
4255                         break;
4256
4257                 wret = walk_up_tree(root, &path, wc, &level);
4258                 if (wret < 0)
4259                         ret = wret;
4260                 if (wret != 0)
4261                         break;
4262         }
4263 skip_walking:
4264         btrfs_release_path(&path);
4265
4266         if (!cache_tree_empty(&corrupt_blocks)) {
4267                 struct cache_extent *cache;
4268                 struct btrfs_corrupt_block *corrupt;
4269
4270                 printf("The following tree block(s) is corrupted in tree %llu:\n",
4271                        root->root_key.objectid);
4272                 cache = first_cache_extent(&corrupt_blocks);
4273                 while (cache) {
4274                         corrupt = container_of(cache,
4275                                                struct btrfs_corrupt_block,
4276                                                cache);
4277                         printf("\ttree block bytenr: %llu, level: %d, node key: (%llu, %u, %llu)\n",
4278                                cache->start, corrupt->level,
4279                                corrupt->key.objectid, corrupt->key.type,
4280                                corrupt->key.offset);
4281                         cache = next_cache_extent(cache);
4282                 }
4283                 if (repair) {
4284                         printf("Try to repair the btree for root %llu\n",
4285                                root->root_key.objectid);
4286                         ret = repair_btree(root, &corrupt_blocks);
4287                         if (ret < 0)
4288                                 fprintf(stderr, "Failed to repair btree: %s\n",
4289                                         strerror(-ret));
4290                         if (!ret)
4291                                 printf("Btree for root %llu is fixed\n",
4292                                        root->root_key.objectid);
4293                 }
4294         }
4295
4296         err = merge_root_recs(root, &root_node.root_cache, root_cache);
4297         if (err < 0)
4298                 ret = err;
4299
4300         if (root_node.current) {
4301                 root_node.current->checked = 1;
4302                 maybe_free_inode_rec(&root_node.inode_cache,
4303                                 root_node.current);
4304         }
4305
4306         err = check_inode_recs(root, &root_node.inode_cache);
4307         if (!ret)
4308                 ret = err;
4309
4310         free_corrupt_blocks_tree(&corrupt_blocks);
4311         root->fs_info->corrupt_blocks = NULL;
4312         free_orphan_data_extents(&root->orphan_data_extents);
4313         return ret;
4314 }
4315
4316 static int fs_root_objectid(u64 objectid)
4317 {
4318         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
4319             objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
4320                 return 1;
4321         return is_fstree(objectid);
4322 }
4323
4324 static int check_fs_roots(struct btrfs_fs_info *fs_info,
4325                           struct cache_tree *root_cache)
4326 {
4327         struct btrfs_path path;
4328         struct btrfs_key key;
4329         struct walk_control wc;
4330         struct extent_buffer *leaf, *tree_node;
4331         struct btrfs_root *tmp_root;
4332         struct btrfs_root *tree_root = fs_info->tree_root;
4333         int ret;
4334         int err = 0;
4335
4336         if (ctx.progress_enabled) {
4337                 ctx.tp = TASK_FS_ROOTS;
4338                 task_start(ctx.info);
4339         }
4340
4341         /*
4342          * Just in case we made any changes to the extent tree that weren't
4343          * reflected into the free space cache yet.
4344          */
4345         if (repair)
4346                 reset_cached_block_groups(fs_info);
4347         memset(&wc, 0, sizeof(wc));
4348         cache_tree_init(&wc.shared);
4349         btrfs_init_path(&path);
4350
4351 again:
4352         key.offset = 0;
4353         key.objectid = 0;
4354         key.type = BTRFS_ROOT_ITEM_KEY;
4355         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
4356         if (ret < 0) {
4357                 err = 1;
4358                 goto out;
4359         }
4360         tree_node = tree_root->node;
4361         while (1) {
4362                 if (tree_node != tree_root->node) {
4363                         free_root_recs_tree(root_cache);
4364                         btrfs_release_path(&path);
4365                         goto again;
4366                 }
4367                 leaf = path.nodes[0];
4368                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
4369                         ret = btrfs_next_leaf(tree_root, &path);
4370                         if (ret) {
4371                                 if (ret < 0)
4372                                         err = 1;
4373                                 break;
4374                         }
4375                         leaf = path.nodes[0];
4376                 }
4377                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
4378                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
4379                     fs_root_objectid(key.objectid)) {
4380                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
4381                                 tmp_root = btrfs_read_fs_root_no_cache(
4382                                                 fs_info, &key);
4383                         } else {
4384                                 key.offset = (u64)-1;
4385                                 tmp_root = btrfs_read_fs_root(
4386                                                 fs_info, &key);
4387                         }
4388                         if (IS_ERR(tmp_root)) {
4389                                 err = 1;
4390                                 goto next;
4391                         }
4392                         ret = check_fs_root(tmp_root, root_cache, &wc);
4393                         if (ret == -EAGAIN) {
4394                                 free_root_recs_tree(root_cache);
4395                                 btrfs_release_path(&path);
4396                                 goto again;
4397                         }
4398                         if (ret)
4399                                 err = 1;
4400                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4401                                 btrfs_free_fs_root(tmp_root);
4402                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
4403                            key.type == BTRFS_ROOT_BACKREF_KEY) {
4404                         process_root_ref(leaf, path.slots[0], &key,
4405                                          root_cache);
4406                 }
4407 next:
4408                 path.slots[0]++;
4409         }
4410 out:
4411         btrfs_release_path(&path);
4412         if (err)
4413                 free_extent_cache_tree(&wc.shared);
4414         if (!cache_tree_empty(&wc.shared))
4415                 fprintf(stderr, "warning line %d\n", __LINE__);
4416
4417         task_stop(ctx.info);
4418
4419         return err;
4420 }
4421
4422 /*
4423  * Find the @index according by @ino and name.
4424  * Notice:time efficiency is O(N)
4425  *
4426  * @root:       the root of the fs/file tree
4427  * @index_ret:  the index as return value
4428  * @namebuf:    the name to match
4429  * @name_len:   the length of name to match
4430  * @file_type:  the file_type of INODE_ITEM to match
4431  *
4432  * Returns 0 if found and *@index_ret will be modified with right value
4433  * Returns< 0 not found and *@index_ret will be (u64)-1
4434  */
4435 static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
4436                           u64 *index_ret, char *namebuf, u32 name_len,
4437                           u8 file_type)
4438 {
4439         struct btrfs_path path;
4440         struct extent_buffer *node;
4441         struct btrfs_dir_item *di;
4442         struct btrfs_key key;
4443         struct btrfs_key location;
4444         char name[BTRFS_NAME_LEN] = {0};
4445
4446         u32 total;
4447         u32 cur = 0;
4448         u32 len;
4449         u32 data_len;
4450         u8 filetype;
4451         int slot;
4452         int ret;
4453
4454         ASSERT(index_ret);
4455
4456         /* search from the last index */
4457         key.objectid = dirid;
4458         key.offset = (u64)-1;
4459         key.type = BTRFS_DIR_INDEX_KEY;
4460
4461         btrfs_init_path(&path);
4462         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
4463         if (ret < 0)
4464                 return ret;
4465
4466 loop:
4467         ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY);
4468         if (ret) {
4469                 ret = -ENOENT;
4470                 *index_ret = (64)-1;
4471                 goto out;
4472         }
4473         /* Check whether inode_id/filetype/name match */
4474         node = path.nodes[0];
4475         slot = path.slots[0];
4476         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4477         total = btrfs_item_size_nr(node, slot);
4478         while (cur < total) {
4479                 ret = -ENOENT;
4480                 len = btrfs_dir_name_len(node, di);
4481                 data_len = btrfs_dir_data_len(node, di);
4482
4483                 btrfs_dir_item_key_to_cpu(node, di, &location);
4484                 if (location.objectid != location_id ||
4485                     location.type != BTRFS_INODE_ITEM_KEY ||
4486                     location.offset != 0)
4487                         goto next;
4488
4489                 filetype = btrfs_dir_type(node, di);
4490                 if (file_type != filetype)
4491                         goto next;
4492
4493                 if (len > BTRFS_NAME_LEN)
4494                         len = BTRFS_NAME_LEN;
4495
4496                 read_extent_buffer(node, name, (unsigned long)(di + 1), len);
4497                 if (len != name_len || strncmp(namebuf, name, len))
4498                         goto next;
4499
4500                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
4501                 *index_ret = key.offset;
4502                 ret = 0;
4503                 goto out;
4504 next:
4505                 len += sizeof(*di) + data_len;
4506                 di = (struct btrfs_dir_item *)((char *)di + len);
4507                 cur += len;
4508         }
4509         goto loop;
4510
4511 out:
4512         btrfs_release_path(&path);
4513         return ret;
4514 }
4515
4516 /*
4517  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
4518  * INODE_REF/INODE_EXTREF match.
4519  *
4520  * @root:       the root of the fs/file tree
4521  * @key:        the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
4522  *              value while find index
4523  * @location_key: location key of the struct btrfs_dir_item to match
4524  * @name:       the name to match
4525  * @namelen:    the length of name
4526  * @file_type:  the type of file to math
4527  *
4528  * Return 0 if no error occurred.
4529  * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
4530  * DIR_ITEM/DIR_INDEX
4531  * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
4532  * and DIR_ITEM/DIR_INDEX mismatch
4533  */
4534 static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
4535                          struct btrfs_key *location_key, char *name,
4536                          u32 namelen, u8 file_type)
4537 {
4538         struct btrfs_path path;
4539         struct extent_buffer *node;
4540         struct btrfs_dir_item *di;
4541         struct btrfs_key location;
4542         char namebuf[BTRFS_NAME_LEN] = {0};
4543         u32 total;
4544         u32 cur = 0;
4545         u32 len;
4546         u32 data_len;
4547         u8 filetype;
4548         int slot;
4549         int ret;
4550
4551         /* get the index by traversing all index */
4552         if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
4553                 ret = find_dir_index(root, key->objectid,
4554                                      location_key->objectid, &key->offset,
4555                                      name, namelen, file_type);
4556                 if (ret)
4557                         ret = DIR_INDEX_MISSING;
4558                 return ret;
4559         }
4560
4561         btrfs_init_path(&path);
4562         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
4563         if (ret) {
4564                 ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
4565                         DIR_INDEX_MISSING;
4566                 goto out;
4567         }
4568
4569         /* Check whether inode_id/filetype/name match */
4570         node = path.nodes[0];
4571         slot = path.slots[0];
4572         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
4573         total = btrfs_item_size_nr(node, slot);
4574         while (cur < total) {
4575                 ret = key->type == BTRFS_DIR_ITEM_KEY ?
4576                         DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH;
4577
4578                 len = btrfs_dir_name_len(node, di);
4579                 data_len = btrfs_dir_data_len(node, di);
4580
4581                 btrfs_dir_item_key_to_cpu(node, di, &location);
4582                 if (location.objectid != location_key->objectid ||
4583                     location.type != location_key->type ||
4584                     location.offset != location_key->offset)
4585                         goto next;
4586
4587                 filetype = btrfs_dir_type(node, di);
4588                 if (file_type != filetype)
4589                         goto next;
4590
4591                 if (len > BTRFS_NAME_LEN) {
4592                         len = BTRFS_NAME_LEN;
4593                         warning("root %llu %s[%llu %llu] name too long %u, trimmed",
4594                         root->objectid,
4595                         key->type == BTRFS_DIR_ITEM_KEY ?
4596                         "DIR_ITEM" : "DIR_INDEX",
4597                         key->objectid, key->offset, len);
4598                 }
4599                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
4600                                    len);
4601                 if (len != namelen || strncmp(namebuf, name, len))
4602                         goto next;
4603
4604                 ret = 0;
4605                 goto out;
4606 next:
4607                 len += sizeof(*di) + data_len;
4608                 di = (struct btrfs_dir_item *)((char *)di + len);
4609                 cur += len;
4610         }
4611
4612 out:
4613         btrfs_release_path(&path);
4614         return ret;
4615 }
4616
4617 /*
4618  * Prints inode ref error message
4619  */
4620 static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
4621                                 u64 index, const char *namebuf, int name_len,
4622                                 u8 filetype, int err)
4623 {
4624         if (!err)
4625                 return;
4626
4627         /* root dir error */
4628         if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4629                 error(
4630         "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s",
4631                       root->objectid, key->objectid, key->offset, namebuf);
4632                 return;
4633         }
4634
4635         /* normal error */
4636         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4637                 error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
4638                       root->objectid, key->offset,
4639                       btrfs_name_hash(namebuf, name_len),
4640                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4641                       namebuf, filetype);
4642         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4643                 error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
4644                       root->objectid, key->offset, index,
4645                       err & DIR_ITEM_MISMATCH ? "mismatch" : "missing",
4646                       namebuf, filetype);
4647 }
4648
4649 /*
4650  * Insert the missing inode item.
4651  *
4652  * Returns 0 means success.
4653  * Returns <0 means error.
4654  */
4655 static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
4656                                      u8 filetype)
4657 {
4658         struct btrfs_key key;
4659         struct btrfs_trans_handle *trans;
4660         struct btrfs_path path;
4661         int ret;
4662
4663         key.objectid = ino;
4664         key.type = BTRFS_INODE_ITEM_KEY;
4665         key.offset = 0;
4666
4667         btrfs_init_path(&path);
4668         trans = btrfs_start_transaction(root, 1);
4669         if (IS_ERR(trans)) {
4670                 ret = -EIO;
4671                 goto out;
4672         }
4673
4674         ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
4675         if (ret < 0 || !ret)
4676                 goto fail;
4677
4678         /* insert inode item */
4679         create_inode_item_lowmem(trans, root, ino, filetype);
4680         ret = 0;
4681 fail:
4682         btrfs_commit_transaction(trans, root);
4683 out:
4684         if (ret)
4685                 error("failed to repair root %llu INODE ITEM[%llu] missing",
4686                       root->objectid, ino);
4687         btrfs_release_path(&path);
4688         return ret;
4689 }
4690
4691 /*
4692  * The ternary means dir item, dir index and relative inode ref.
4693  * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING
4694  * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
4695  * strategy:
4696  * If two of three is missing or mismatched, delete the existing one.
4697  * If one of three is missing or mismatched, add the missing one.
4698  *
4699  * returns 0 means success.
4700  * returns not 0 means on error;
4701  */
4702 int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
4703                           u64 index, char *name, int name_len, u8 filetype,
4704                           int err)
4705 {
4706         struct btrfs_trans_handle *trans;
4707         int stage = 0;
4708         int ret = 0;
4709
4710         /*
4711          * stage shall be one of following valild values:
4712          *      0: Fine, nothing to do.
4713          *      1: One of three is wrong, so add missing one.
4714          *      2: Two of three is wrong, so delete existed one.
4715          */
4716         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
4717                 stage++;
4718         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
4719                 stage++;
4720         if (err & (INODE_REF_MISSING))
4721                 stage++;
4722
4723         /* stage must be smllarer than 3 */
4724         ASSERT(stage < 3);
4725
4726         trans = btrfs_start_transaction(root, 1);
4727         if (stage == 2) {
4728                 ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
4729                                    name_len, 0);
4730                 goto out;
4731         }
4732         if (stage == 1) {
4733                 ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
4734                                filetype, &index, 1, 1);
4735                 goto out;
4736         }
4737 out:
4738         btrfs_commit_transaction(trans, root);
4739
4740         if (ret)
4741                 error("fail to repair inode %llu name %s filetype %u",
4742                       ino, name, filetype);
4743         else
4744                 printf("%s ref/dir_item of inode %llu name %s filetype %u\n",
4745                        stage == 2 ? "Delete" : "Add",
4746                        ino, name, filetype);
4747
4748         return ret;
4749 }
4750
4751 /*
4752  * Traverse the given INODE_REF and call find_dir_item() to find related
4753  * DIR_ITEM/DIR_INDEX.
4754  *
4755  * @root:       the root of the fs/file tree
4756  * @ref_key:    the key of the INODE_REF
4757  * @path        the path provides node and slot
4758  * @refs:       the count of INODE_REF
4759  * @mode:       the st_mode of INODE_ITEM
4760  * @name_ret:   returns with the first ref's name
4761  * @name_len_ret:    len of the name_ret
4762  *
4763  * Return 0 if no error occurred.
4764  */
4765 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
4766                            struct btrfs_path *path, char *name_ret,
4767                            u32 *namelen_ret, u64 *refs_ret, int mode)
4768 {
4769         struct btrfs_key key;
4770         struct btrfs_key location;
4771         struct btrfs_inode_ref *ref;
4772         struct extent_buffer *node;
4773         char namebuf[BTRFS_NAME_LEN] = {0};
4774         u32 total;
4775         u32 cur = 0;
4776         u32 len;
4777         u32 name_len;
4778         u64 index;
4779         int ret;
4780         int err = 0;
4781         int tmp_err;
4782         int slot;
4783         int need_research = 0;
4784         u64 refs;
4785
4786 begin:
4787         err = 0;
4788         cur = 0;
4789         refs = *refs_ret;
4790
4791         /* since after repair, path and the dir item may be changed */
4792         if (need_research) {
4793                 need_research = 0;
4794                 btrfs_release_path(path);
4795                 ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0);
4796                 /* the item was deleted, let path point to the last checked item */
4797                 if (ret > 0) {
4798                         if (path->slots[0] == 0)
4799                                 btrfs_prev_leaf(root, path);
4800                         else
4801                                 path->slots[0]--;
4802                 }
4803                 if (ret)
4804                         goto out;
4805         }
4806
4807         location.objectid = ref_key->objectid;
4808         location.type = BTRFS_INODE_ITEM_KEY;
4809         location.offset = 0;
4810         node = path->nodes[0];
4811         slot = path->slots[0];
4812
4813         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
4814         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
4815         total = btrfs_item_size_nr(node, slot);
4816
4817 next:
4818         /* Update inode ref count */
4819         refs++;
4820         tmp_err = 0;
4821         index = btrfs_inode_ref_index(node, ref);
4822         name_len = btrfs_inode_ref_name_len(node, ref);
4823
4824         if (name_len <= BTRFS_NAME_LEN) {
4825                 len = name_len;
4826         } else {
4827                 len = BTRFS_NAME_LEN;
4828                 warning("root %llu INODE_REF[%llu %llu] name too long",
4829                         root->objectid, ref_key->objectid, ref_key->offset);
4830         }
4831
4832         read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
4833
4834         /* copy the first name found to name_ret */
4835         if (refs == 1 && name_ret) {
4836                 memcpy(name_ret, namebuf, len);
4837                 *namelen_ret = len;
4838         }
4839
4840         /* Check root dir ref */
4841         if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
4842                 if (index != 0 || len != strlen("..") ||
4843                     strncmp("..", namebuf, len) ||
4844                     ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
4845                         /* set err bits then repair will delete the ref */
4846                         err |= DIR_INDEX_MISSING;
4847                         err |= DIR_ITEM_MISSING;
4848                 }
4849                 goto end;
4850         }
4851
4852         /* Find related DIR_INDEX */
4853         key.objectid = ref_key->offset;
4854         key.type = BTRFS_DIR_INDEX_KEY;
4855         key.offset = index;
4856         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4857                             imode_to_type(mode));
4858
4859         /* Find related dir_item */
4860         key.objectid = ref_key->offset;
4861         key.type = BTRFS_DIR_ITEM_KEY;
4862         key.offset = btrfs_name_hash(namebuf, len);
4863         tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
4864                             imode_to_type(mode));
4865 end:
4866         if (tmp_err && repair) {
4867                 ret = repair_ternary_lowmem(root, ref_key->offset,
4868                                             ref_key->objectid, index, namebuf,
4869                                             name_len, imode_to_type(mode),
4870                                             tmp_err);
4871                 if (!ret) {
4872                         need_research = 1;
4873                         goto begin;
4874                 }
4875         }
4876         print_inode_ref_err(root, ref_key, index, namebuf, name_len,
4877                             imode_to_type(mode), tmp_err);
4878         err |= tmp_err;
4879         len = sizeof(*ref) + name_len;
4880         ref = (struct btrfs_inode_ref *)((char *)ref + len);
4881         cur += len;
4882         if (cur < total)
4883                 goto next;
4884
4885 out:
4886         *refs_ret = refs;
4887         return err;
4888 }
4889
4890 /*
4891  * Traverse the given INODE_EXTREF and call find_dir_item() to find related
4892  * DIR_ITEM/DIR_INDEX.
4893  *
4894  * @root:       the root of the fs/file tree
4895  * @ref_key:    the key of the INODE_EXTREF
4896  * @refs:       the count of INODE_EXTREF
4897  * @mode:       the st_mode of INODE_ITEM
4898  *
4899  * Return 0 if no error occurred.
4900  */
4901 static int check_inode_extref(struct btrfs_root *root,
4902                               struct btrfs_key *ref_key,
4903                               struct extent_buffer *node, int slot, u64 *refs,
4904                               int mode)
4905 {
4906         struct btrfs_key key;
4907         struct btrfs_key location;
4908         struct btrfs_inode_extref *extref;
4909         char namebuf[BTRFS_NAME_LEN] = {0};
4910         u32 total;
4911         u32 cur = 0;
4912         u32 len;
4913         u32 name_len;
4914         u64 index;
4915         u64 parent;
4916         int ret;
4917         int err = 0;
4918
4919         location.objectid = ref_key->objectid;
4920         location.type = BTRFS_INODE_ITEM_KEY;
4921         location.offset = 0;
4922
4923         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
4924         total = btrfs_item_size_nr(node, slot);
4925
4926 next:
4927         /* update inode ref count */
4928         (*refs)++;
4929         name_len = btrfs_inode_extref_name_len(node, extref);
4930         index = btrfs_inode_extref_index(node, extref);
4931         parent = btrfs_inode_extref_parent(node, extref);
4932         if (name_len <= BTRFS_NAME_LEN) {
4933                 len = name_len;
4934         } else {
4935                 len = BTRFS_NAME_LEN;
4936                 warning("root %llu INODE_EXTREF[%llu %llu] name too long",
4937                         root->objectid, ref_key->objectid, ref_key->offset);
4938         }
4939         read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len);
4940
4941         /* Check root dir ref name */
4942         if (index == 0 && strncmp(namebuf, "..", name_len)) {
4943                 error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s",
4944                       root->objectid, ref_key->objectid, ref_key->offset,
4945                       namebuf);
4946                 err |= ROOT_DIR_ERROR;
4947         }
4948
4949         /* find related dir_index */
4950         key.objectid = parent;
4951         key.type = BTRFS_DIR_INDEX_KEY;
4952         key.offset = index;
4953         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4954         err |= ret;
4955
4956         /* find related dir_item */
4957         key.objectid = parent;
4958         key.type = BTRFS_DIR_ITEM_KEY;
4959         key.offset = btrfs_name_hash(namebuf, len);
4960         ret = find_dir_item(root, &key, &location, namebuf, len, mode);
4961         err |= ret;
4962
4963         len = sizeof(*extref) + name_len;
4964         extref = (struct btrfs_inode_extref *)((char *)extref + len);
4965         cur += len;
4966
4967         if (cur < total)
4968                 goto next;
4969
4970         return err;
4971 }
4972
4973 /*
4974  * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified
4975  * DIR_ITEM/DIR_INDEX match.
4976  * Return with @index_ret.
4977  *
4978  * @root:       the root of the fs/file tree
4979  * @key:        the key of the INODE_REF/INODE_EXTREF
4980  * @name:       the name in the INODE_REF/INODE_EXTREF
4981  * @namelen:    the length of name in the INODE_REF/INODE_EXTREF
4982  * @index_ret:  the index in the INODE_REF/INODE_EXTREF,
4983  *              value (64)-1 means do not check index
4984  * @ext_ref:    the EXTENDED_IREF feature
4985  *
4986  * Return 0 if no error occurred.
4987  * Return >0 for error bitmap
4988  */
4989 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
4990                           char *name, int namelen, u64 *index_ret,
4991                           unsigned int ext_ref)
4992 {
4993         struct btrfs_path path;
4994         struct btrfs_inode_ref *ref;
4995         struct btrfs_inode_extref *extref;
4996         struct extent_buffer *node;
4997         char ref_namebuf[BTRFS_NAME_LEN] = {0};
4998         u32 total;
4999         u32 cur = 0;
5000         u32 len;
5001         u32 ref_namelen;
5002         u64 ref_index;
5003         u64 parent;
5004         u64 dir_id;
5005         int slot;
5006         int ret;
5007
5008         ASSERT(index_ret);
5009
5010         btrfs_init_path(&path);
5011         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5012         if (ret) {
5013                 ret = INODE_REF_MISSING;
5014                 goto extref;
5015         }
5016
5017         node = path.nodes[0];
5018         slot = path.slots[0];
5019
5020         ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
5021         total = btrfs_item_size_nr(node, slot);
5022
5023         /* Iterate all entry of INODE_REF */
5024         while (cur < total) {
5025                 ret = INODE_REF_MISSING;
5026
5027                 ref_namelen = btrfs_inode_ref_name_len(node, ref);
5028                 ref_index = btrfs_inode_ref_index(node, ref);
5029                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5030                         goto next_ref;
5031
5032                 if (cur + sizeof(*ref) + ref_namelen > total ||
5033                     ref_namelen > BTRFS_NAME_LEN) {
5034                         warning("root %llu INODE %s[%llu %llu] name too long",
5035                                 root->objectid,
5036                                 key->type == BTRFS_INODE_REF_KEY ?
5037                                         "REF" : "EXTREF",
5038                                 key->objectid, key->offset);
5039
5040                         if (cur + sizeof(*ref) > total)
5041                                 break;
5042                         len = min_t(u32, total - cur - sizeof(*ref),
5043                                     BTRFS_NAME_LEN);
5044                 } else {
5045                         len = ref_namelen;
5046                 }
5047
5048                 read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1),
5049                                    len);
5050
5051                 if (len != namelen || strncmp(ref_namebuf, name, len))
5052                         goto next_ref;
5053
5054                 *index_ret = ref_index;
5055                 ret = 0;
5056                 goto out;
5057 next_ref:
5058                 len = sizeof(*ref) + ref_namelen;
5059                 ref = (struct btrfs_inode_ref *)((char *)ref + len);
5060                 cur += len;
5061         }
5062
5063 extref:
5064         /* Skip if not support EXTENDED_IREF feature */
5065         if (!ext_ref)
5066                 goto out;
5067
5068         btrfs_release_path(&path);
5069         btrfs_init_path(&path);
5070
5071         dir_id = key->offset;
5072         key->type = BTRFS_INODE_EXTREF_KEY;
5073         key->offset = btrfs_extref_hash(dir_id, name, namelen);
5074
5075         ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
5076         if (ret) {
5077                 ret = INODE_REF_MISSING;
5078                 goto out;
5079         }
5080
5081         node = path.nodes[0];
5082         slot = path.slots[0];
5083
5084         extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref);
5085         cur = 0;
5086         total = btrfs_item_size_nr(node, slot);
5087
5088         /* Iterate all entry of INODE_EXTREF */
5089         while (cur < total) {
5090                 ret = INODE_REF_MISSING;
5091
5092                 ref_namelen = btrfs_inode_extref_name_len(node, extref);
5093                 ref_index = btrfs_inode_extref_index(node, extref);
5094                 parent = btrfs_inode_extref_parent(node, extref);
5095                 if (*index_ret != (u64)-1 && *index_ret != ref_index)
5096                         goto next_extref;
5097
5098                 if (parent != dir_id)
5099                         goto next_extref;
5100
5101                 if (ref_namelen <= BTRFS_NAME_LEN) {
5102                         len = ref_namelen;
5103                 } else {
5104                         len = BTRFS_NAME_LEN;
5105                         warning("root %llu INODE %s[%llu %llu] name too long",
5106                                 root->objectid,
5107                                 key->type == BTRFS_INODE_REF_KEY ?
5108                                         "REF" : "EXTREF",
5109                                 key->objectid, key->offset);
5110                 }
5111                 read_extent_buffer(node, ref_namebuf,
5112                                    (unsigned long)(extref + 1), len);
5113
5114                 if (len != namelen || strncmp(ref_namebuf, name, len))
5115                         goto next_extref;
5116
5117                 *index_ret = ref_index;
5118                 ret = 0;
5119                 goto out;
5120
5121 next_extref:
5122                 len = sizeof(*extref) + ref_namelen;
5123                 extref = (struct btrfs_inode_extref *)((char *)extref + len);
5124                 cur += len;
5125
5126         }
5127 out:
5128         btrfs_release_path(&path);
5129         return ret;
5130 }
5131
5132 static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
5133                                u64 ino, u64 index, const char *namebuf,
5134                                int name_len, u8 filetype, int err)
5135 {
5136         if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
5137                 error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
5138                       root->objectid, key->objectid, key->offset, namebuf,
5139                       filetype,
5140                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5141         }
5142
5143         if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
5144                 error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
5145                       root->objectid, key->objectid, index, namebuf, filetype,
5146                       err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
5147         }
5148
5149         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
5150                 error(
5151                 "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s",
5152                       root->objectid, ino, index, namebuf, filetype,
5153                       err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
5154         }
5155
5156         if (err & INODE_REF_MISSING)
5157                 error(
5158                 "root %llu INODE REF[%llu, %llu] name %s filetype %u missing",
5159                       root->objectid, ino, key->objectid, namebuf, filetype);
5160
5161 }
5162
5163 /*
5164  * Call repair_inode_item_missing and repair_ternary_lowmem to repair
5165  *
5166  * Returns error after repair
5167  */
5168 static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
5169                            u64 index, u8 filetype, char *namebuf, u32 name_len,
5170                            int err)
5171 {
5172         int ret;
5173
5174         if (err & INODE_ITEM_MISSING) {
5175                 ret = repair_inode_item_missing(root, ino, filetype);
5176                 if (!ret)
5177                         err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING);
5178         }
5179
5180         if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
5181                 ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
5182                                             name_len, filetype, err);
5183                 if (!ret) {
5184                         err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
5185                         err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
5186                         err &= ~(INODE_REF_MISSING);
5187                 }
5188         }
5189         return err;
5190 }
5191
5192 static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type,
5193                 u64 *size_ret)
5194 {
5195         struct btrfs_key key;
5196         struct btrfs_path path;
5197         u32 len;
5198         struct btrfs_dir_item *di;
5199         int ret;
5200         int cur = 0;
5201         int total = 0;
5202
5203         ASSERT(size_ret);
5204         *size_ret = 0;
5205
5206         key.objectid = ino;
5207         key.type = type;
5208         key.offset = (u64)-1;
5209
5210         btrfs_init_path(&path);
5211         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
5212         if (ret < 0) {
5213                 ret = -EIO;
5214                 goto out;
5215         }
5216         /* if found, go to spacial case */
5217         if (ret == 0)
5218                 goto special_case;
5219
5220 loop:
5221         ret = btrfs_previous_item(root, &path, ino, type);
5222
5223         if (ret) {
5224                 ret = 0;
5225                 goto out;
5226         }
5227
5228 special_case:
5229         di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item);
5230         cur = 0;
5231         total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
5232
5233         while (cur < total) {
5234                 len = btrfs_dir_name_len(path.nodes[0], di);
5235                 if (len > BTRFS_NAME_LEN)
5236                         len = BTRFS_NAME_LEN;
5237                 *size_ret += len;
5238
5239                 len += btrfs_dir_data_len(path.nodes[0], di);
5240                 len += sizeof(*di);
5241                 di = (struct btrfs_dir_item *)((char *)di + len);
5242                 cur += len;
5243         }
5244         goto loop;
5245
5246 out:
5247         btrfs_release_path(&path);
5248         return ret;
5249 }
5250
5251 static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
5252 {
5253         u64 item_size;
5254         u64 index_size;
5255         int ret;
5256
5257         ASSERT(size);
5258         ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size);
5259         if (ret)
5260                 goto out;
5261
5262         ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size);
5263         if (ret)
5264                 goto out;
5265
5266         *size = item_size + index_size;
5267
5268 out:
5269         if (ret)
5270                 error("failed to count root %llu INODE[%llu] root size",
5271                       root->objectid, ino);
5272         return ret;
5273 }
5274
5275 /*
5276  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
5277  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
5278  *
5279  * @root:       the root of the fs/file tree
5280  * @key:        the key of the INODE_REF/INODE_EXTREF
5281  * @path:       the path
5282  * @size:       the st_size of the INODE_ITEM
5283  * @ext_ref:    the EXTENDED_IREF feature
5284  *
5285  * Return 0 if no error occurred.
5286  * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated.
5287  */
5288 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key,
5289                           struct btrfs_path *path, u64 *size,
5290                           unsigned int ext_ref)
5291 {
5292         struct btrfs_dir_item *di;
5293         struct btrfs_inode_item *ii;
5294         struct btrfs_key key;
5295         struct btrfs_key location;
5296         struct extent_buffer *node;
5297         int slot;
5298         char namebuf[BTRFS_NAME_LEN] = {0};
5299         u32 total;
5300         u32 cur = 0;
5301         u32 len;
5302         u32 name_len;
5303         u32 data_len;
5304         u8 filetype;
5305         u32 mode = 0;
5306         u64 index;
5307         int ret;
5308         int err;
5309         int tmp_err;
5310         int need_research = 0;
5311
5312         /*
5313          * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
5314          * ignore index check.
5315          */
5316         if (di_key->type == BTRFS_DIR_INDEX_KEY)
5317                 index = di_key->offset;
5318         else
5319                 index = (u64)-1;
5320 begin:
5321         err = 0;
5322         cur = 0;
5323
5324         /* since after repair, path and the dir item may be changed */
5325         if (need_research) {
5326                 need_research = 0;
5327                 err |= DIR_COUNT_AGAIN;
5328                 btrfs_release_path(path);
5329                 ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5330                 /* the item was deleted, let path point the last checked item */
5331                 if (ret > 0) {
5332                         if (path->slots[0] == 0)
5333                                 btrfs_prev_leaf(root, path);
5334                         else
5335                                 path->slots[0]--;
5336                 }
5337                 if (ret)
5338                         goto out;
5339         }
5340
5341         node = path->nodes[0];
5342         slot = path->slots[0];
5343
5344         di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
5345         total = btrfs_item_size_nr(node, slot);
5346         memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
5347
5348         while (cur < total) {
5349                 data_len = btrfs_dir_data_len(node, di);
5350                 tmp_err = 0;
5351                 if (data_len)
5352                         error("root %llu %s[%llu %llu] data_len shouldn't be %u",
5353                               root->objectid,
5354               di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5355                               di_key->objectid, di_key->offset, data_len);
5356
5357                 name_len = btrfs_dir_name_len(node, di);
5358                 if (name_len <= BTRFS_NAME_LEN) {
5359                         len = name_len;
5360                 } else {
5361                         len = BTRFS_NAME_LEN;
5362                         warning("root %llu %s[%llu %llu] name too long",
5363                                 root->objectid,
5364                 di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX",
5365                                 di_key->objectid, di_key->offset);
5366                 }
5367                 (*size) += name_len;
5368                 read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
5369                                    len);
5370                 filetype = btrfs_dir_type(node, di);
5371
5372                 if (di_key->type == BTRFS_DIR_ITEM_KEY &&
5373                     di_key->offset != btrfs_name_hash(namebuf, len)) {
5374                         err |= -EIO;
5375                         error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
5376                         root->objectid, di_key->objectid, di_key->offset,
5377                         namebuf, len, filetype, di_key->offset,
5378                         btrfs_name_hash(namebuf, len));
5379                 }
5380
5381                 btrfs_dir_item_key_to_cpu(node, di, &location);
5382                 /* Ignore related ROOT_ITEM check */
5383                 if (location.type == BTRFS_ROOT_ITEM_KEY)
5384                         goto next;
5385
5386                 btrfs_release_path(path);
5387                 /* Check relative INODE_ITEM(existence/filetype) */
5388                 ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
5389                 if (ret) {
5390                         tmp_err |= INODE_ITEM_MISSING;
5391                         goto next;
5392                 }
5393
5394                 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5395                                     struct btrfs_inode_item);
5396                 mode = btrfs_inode_mode(path->nodes[0], ii);
5397                 if (imode_to_type(mode) != filetype) {
5398                         tmp_err |= INODE_ITEM_MISMATCH;
5399                         goto next;
5400                 }
5401
5402                 /* Check relative INODE_REF/INODE_EXTREF */
5403                 key.objectid = location.objectid;
5404                 key.type = BTRFS_INODE_REF_KEY;
5405                 key.offset = di_key->objectid;
5406                 tmp_err |= find_inode_ref(root, &key, namebuf, len,
5407                                           &index, ext_ref);
5408
5409                 /* check relative INDEX/ITEM */
5410                 key.objectid = di_key->objectid;
5411                 if (key.type == BTRFS_DIR_ITEM_KEY) {
5412                         key.type = BTRFS_DIR_INDEX_KEY;
5413                         key.offset = index;
5414                 } else {
5415                         key.type = BTRFS_DIR_ITEM_KEY;
5416                         key.offset = btrfs_name_hash(namebuf, name_len);
5417                 }
5418
5419                 tmp_err |= find_dir_item(root, &key, &location, namebuf,
5420                                          name_len, filetype);
5421                 /* find_dir_item may find index */
5422                 if (key.type == BTRFS_DIR_INDEX_KEY)
5423                         index = key.offset;
5424 next:
5425
5426                 if (tmp_err && repair) {
5427                         ret = repair_dir_item(root, di_key->objectid,
5428                                               location.objectid, index,
5429                                               imode_to_type(mode), namebuf,
5430                                               name_len, tmp_err);
5431                         if (ret != tmp_err) {
5432                                 need_research = 1;
5433                                 goto begin;
5434                         }
5435                 }
5436                 btrfs_release_path(path);
5437                 print_dir_item_err(root, di_key, location.objectid, index,
5438                                    namebuf, name_len, filetype, tmp_err);
5439                 err |= tmp_err;
5440                 len = sizeof(*di) + name_len + data_len;
5441                 di = (struct btrfs_dir_item *)((char *)di + len);
5442                 cur += len;
5443
5444                 if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) {
5445                         error("root %llu DIR_INDEX[%llu %llu] should contain only one entry",
5446                               root->objectid, di_key->objectid,
5447                               di_key->offset);
5448                         break;
5449                 }
5450         }
5451 out:
5452         /* research path */
5453         btrfs_release_path(path);
5454         ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0);
5455         if (ret)
5456                 err |= ret > 0 ? -ENOENT : ret;
5457         return err;
5458 }
5459
5460 /*
5461  * Wrapper function of btrfs_punch_hole.
5462  *
5463  * Returns 0 means success.
5464  * Returns not 0 means error.
5465  */
5466 static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
5467                              u64 len)
5468 {
5469         struct btrfs_trans_handle *trans;
5470         int ret = 0;
5471
5472         trans = btrfs_start_transaction(root, 1);
5473         if (IS_ERR(trans))
5474                 return PTR_ERR(trans);
5475
5476         ret = btrfs_punch_hole(trans, root, ino, start, len);
5477         if (ret)
5478                 error("failed to add hole [%llu, %llu] in inode [%llu]",
5479                       start, len, ino);
5480         else
5481                 printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len,
5482                        ino);
5483
5484         btrfs_commit_transaction(trans, root);
5485         return ret;
5486 }
5487
5488 /*
5489  * Check file extent datasum/hole, update the size of the file extents,
5490  * check and update the last offset of the file extent.
5491  *
5492  * @root:       the root of fs/file tree.
5493  * @fkey:       the key of the file extent.
5494  * @nodatasum:  INODE_NODATASUM feature.
5495  * @size:       the sum of all EXTENT_DATA items size for this inode.
5496  * @end:        the offset of the last extent.
5497  *
5498  * Return 0 if no error occurred.
5499  */
5500 static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey,
5501                              struct extent_buffer *node, int slot,
5502                              unsigned int nodatasum, u64 *size, u64 *end)
5503 {
5504         struct btrfs_file_extent_item *fi;
5505         u64 disk_bytenr;
5506         u64 disk_num_bytes;
5507         u64 extent_num_bytes;
5508         u64 extent_offset;
5509         u64 csum_found;         /* In byte size, sectorsize aligned */
5510         u64 search_start;       /* Logical range start we search for csum */
5511         u64 search_len;         /* Logical range len we search for csum */
5512         unsigned int extent_type;
5513         unsigned int is_hole;
5514         int compressed = 0;
5515         int ret;
5516         int err = 0;
5517
5518         fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
5519
5520         /* Check inline extent */
5521         extent_type = btrfs_file_extent_type(node, fi);
5522         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
5523                 struct btrfs_item *e = btrfs_item_nr(slot);
5524                 u32 item_inline_len;
5525
5526                 item_inline_len = btrfs_file_extent_inline_item_len(node, e);
5527                 extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi);
5528                 compressed = btrfs_file_extent_compression(node, fi);
5529                 if (extent_num_bytes == 0) {
5530                         error(
5531                 "root %llu EXTENT_DATA[%llu %llu] has empty inline extent",
5532                                 root->objectid, fkey->objectid, fkey->offset);
5533                         err |= FILE_EXTENT_ERROR;
5534                 }
5535                 if (!compressed && extent_num_bytes != item_inline_len) {
5536                         error(
5537                 "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u",
5538                                 root->objectid, fkey->objectid, fkey->offset,
5539                                 extent_num_bytes, item_inline_len);
5540                         err |= FILE_EXTENT_ERROR;
5541                 }
5542                 *end += extent_num_bytes;
5543                 *size += extent_num_bytes;
5544                 return err;
5545         }
5546
5547         /* Check extent type */
5548         if (extent_type != BTRFS_FILE_EXTENT_REG &&
5549                         extent_type != BTRFS_FILE_EXTENT_PREALLOC) {
5550                 err |= FILE_EXTENT_ERROR;
5551                 error("root %llu EXTENT_DATA[%llu %llu] type bad",
5552                       root->objectid, fkey->objectid, fkey->offset);
5553                 return err;
5554         }
5555
5556         /* Check REG_EXTENT/PREALLOC_EXTENT */
5557         disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi);
5558         disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi);
5559         extent_num_bytes = btrfs_file_extent_num_bytes(node, fi);
5560         extent_offset = btrfs_file_extent_offset(node, fi);
5561         compressed = btrfs_file_extent_compression(node, fi);
5562         is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0);
5563
5564         /*
5565          * Check EXTENT_DATA csum
5566          *
5567          * For plain (uncompressed) extent, we should only check the range
5568          * we're referring to, as it's possible that part of prealloc extent
5569          * has been written, and has csum:
5570          *
5571          * |<--- Original large preallocated extent A ---->|
5572          * |<- Prealloc File Extent ->|<- Regular Extent ->|
5573          *      No csum                         Has csum
5574          *
5575          * For compressed extent, we should check the whole range.
5576          */
5577         if (!compressed) {
5578                 search_start = disk_bytenr + extent_offset;
5579                 search_len = extent_num_bytes;
5580         } else {
5581                 search_start = disk_bytenr;
5582                 search_len = disk_num_bytes;
5583         }
5584         ret = count_csum_range(root, search_start, search_len, &csum_found);
5585         if (csum_found > 0 && nodatasum) {
5586                 err |= ODD_CSUM_ITEM;
5587                 error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum",
5588                       root->objectid, fkey->objectid, fkey->offset);
5589         } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum &&
5590                    !is_hole && (ret < 0 || csum_found < search_len)) {
5591                 err |= CSUM_ITEM_MISSING;
5592                 error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu",
5593                       root->objectid, fkey->objectid, fkey->offset,
5594                       csum_found, search_len);
5595         } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) {
5596                 err |= ODD_CSUM_ITEM;
5597                 error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu",
5598                       root->objectid, fkey->objectid, fkey->offset, csum_found);
5599         }
5600
5601         /* Check EXTENT_DATA hole */
5602         if (!no_holes && *end != fkey->offset) {
5603                 if (repair)
5604                         ret = punch_extent_hole(root, fkey->objectid,
5605                                                 *end, fkey->offset - *end);
5606                 if (!repair || ret) {
5607                         err |= FILE_EXTENT_ERROR;
5608                         error(
5609 "root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]",
5610                                 root->objectid, fkey->objectid, fkey->offset,
5611                                 fkey->objectid, *end);
5612                 }
5613         }
5614
5615         *end += extent_num_bytes;
5616         if (!is_hole)
5617                 *size += extent_num_bytes;
5618
5619         return err;
5620 }
5621
5622 /*
5623  * Set inode item nbytes to @nbytes
5624  *
5625  * Returns  0     on success
5626  * Returns  != 0  on error
5627  */
5628 static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
5629                                       struct btrfs_path *path,
5630                                       u64 ino, u64 nbytes)
5631 {
5632         struct btrfs_trans_handle *trans;
5633         struct btrfs_inode_item *ii;
5634         struct btrfs_key key;
5635         struct btrfs_key research_key;
5636         int err = 0;
5637         int ret;
5638
5639         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5640
5641         key.objectid = ino;
5642         key.type = BTRFS_INODE_ITEM_KEY;
5643         key.offset = 0;
5644
5645         trans = btrfs_start_transaction(root, 1);
5646         if (IS_ERR(trans)) {
5647                 ret = PTR_ERR(trans);
5648                 err |= ret;
5649                 goto out;
5650         }
5651
5652         btrfs_release_path(path);
5653         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5654         if (ret > 0)
5655                 ret = -ENOENT;
5656         if (ret) {
5657                 err |= ret;
5658                 goto fail;
5659         }
5660
5661         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5662                             struct btrfs_inode_item);
5663         btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
5664         btrfs_mark_buffer_dirty(path->nodes[0]);
5665 fail:
5666         btrfs_commit_transaction(trans, root);
5667 out:
5668         if (ret)
5669                 error("failed to set nbytes in inode %llu root %llu",
5670                       ino, root->root_key.objectid);
5671         else
5672                 printf("Set nbytes in inode item %llu root %llu\n to %llu", ino,
5673                        root->root_key.objectid, nbytes);
5674
5675         /* research path */
5676         btrfs_release_path(path);
5677         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5678         err |= ret;
5679
5680         return err;
5681 }
5682
5683 /*
5684  * Set directory inode isize to @isize.
5685  *
5686  * Returns 0     on success.
5687  * Returns != 0  on error.
5688  */
5689 static int repair_dir_isize_lowmem(struct btrfs_root *root,
5690                                    struct btrfs_path *path,
5691                                    u64 ino, u64 isize)
5692 {
5693         struct btrfs_trans_handle *trans;
5694         struct btrfs_inode_item *ii;
5695         struct btrfs_key key;
5696         struct btrfs_key research_key;
5697         int ret;
5698         int err = 0;
5699
5700         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5701
5702         key.objectid = ino;
5703         key.type = BTRFS_INODE_ITEM_KEY;
5704         key.offset = 0;
5705
5706         trans = btrfs_start_transaction(root, 1);
5707         if (IS_ERR(trans)) {
5708                 ret = PTR_ERR(trans);
5709                 err |= ret;
5710                 goto out;
5711         }
5712
5713         btrfs_release_path(path);
5714         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5715         if (ret > 0)
5716                 ret = -ENOENT;
5717         if (ret) {
5718                 err |= ret;
5719                 goto fail;
5720         }
5721
5722         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5723                             struct btrfs_inode_item);
5724         btrfs_set_inode_size(path->nodes[0], ii, isize);
5725         btrfs_mark_buffer_dirty(path->nodes[0]);
5726 fail:
5727         btrfs_commit_transaction(trans, root);
5728 out:
5729         if (ret)
5730                 error("failed to set isize in inode %llu root %llu",
5731                       ino, root->root_key.objectid);
5732         else
5733                 printf("Set isize in inode %llu root %llu to %llu\n",
5734                        ino, root->root_key.objectid, isize);
5735
5736         btrfs_release_path(path);
5737         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5738         err |= ret;
5739
5740         return err;
5741 }
5742
5743 /*
5744  * Wrapper function for btrfs_add_orphan_item().
5745  *
5746  * Returns 0     on success.
5747  * Returns != 0  on error.
5748  */
5749 static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
5750                                            struct btrfs_path *path, u64 ino)
5751 {
5752         struct btrfs_trans_handle *trans;
5753         struct btrfs_key research_key;
5754         int ret;
5755         int err = 0;
5756
5757         btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
5758
5759         trans = btrfs_start_transaction(root, 1);
5760         if (IS_ERR(trans)) {
5761                 ret = PTR_ERR(trans);
5762                 err |= ret;
5763                 goto out;
5764         }
5765
5766         btrfs_release_path(path);
5767         ret = btrfs_add_orphan_item(trans, root, path, ino);
5768         err |= ret;
5769         btrfs_commit_transaction(trans, root);
5770 out:
5771         if (ret)
5772                 error("failed to add inode %llu as orphan item root %llu",
5773                       ino, root->root_key.objectid);
5774         else
5775                 printf("Added inode %llu as orphan item root %llu\n",
5776                        ino, root->root_key.objectid);
5777
5778         btrfs_release_path(path);
5779         ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
5780         err |= ret;
5781
5782         return err;
5783 }
5784
5785 /* Set inode_item nlink to @ref_count.
5786  * If @ref_count == 0, move it to "lost+found" and increase @ref_count.
5787  *
5788  * Returns 0 on success
5789  */
5790 static int repair_inode_nlinks_lowmem(struct btrfs_root *root,
5791                                       struct btrfs_path *path, u64 ino,
5792                                       const char *name, u32 namelen,
5793                                       u64 ref_count, u8 filetype, u64 *nlink)
5794 {
5795         struct btrfs_trans_handle *trans;
5796         struct btrfs_inode_item *ii;
5797         struct btrfs_key key;
5798         struct btrfs_key old_key;
5799         char namebuf[BTRFS_NAME_LEN] = {0};
5800         int name_len;
5801         int ret;
5802         int ret2;
5803
5804         /* save the key */
5805         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
5806
5807         if (name && namelen) {
5808                 ASSERT(namelen <= BTRFS_NAME_LEN);
5809                 memcpy(namebuf, name, namelen);
5810                 name_len = namelen;
5811         } else {
5812                 sprintf(namebuf, "%llu", ino);
5813                 name_len = count_digits(ino);
5814                 printf("Can't find file name for inode %llu, use %s instead\n",
5815                        ino, namebuf);
5816         }
5817
5818         trans = btrfs_start_transaction(root, 1);
5819         if (IS_ERR(trans)) {
5820                 ret = PTR_ERR(trans);
5821                 goto out;
5822         }
5823
5824         btrfs_release_path(path);
5825         /* if refs is 0, put it into lostfound */
5826         if (ref_count == 0) {
5827                 ret = link_inode_to_lostfound(trans, root, path, ino, namebuf,
5828                                               name_len, filetype, &ref_count);
5829                 if (ret)
5830                         goto fail;
5831         }
5832
5833         /* reset inode_item's nlink to ref_count */
5834         key.objectid = ino;
5835         key.type = BTRFS_INODE_ITEM_KEY;
5836         key.offset = 0;
5837
5838         btrfs_release_path(path);
5839         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
5840         if (ret > 0)
5841                 ret = -ENOENT;
5842         if (ret)
5843                 goto fail;
5844
5845         ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
5846                             struct btrfs_inode_item);
5847         btrfs_set_inode_nlink(path->nodes[0], ii, ref_count);
5848         btrfs_mark_buffer_dirty(path->nodes[0]);
5849
5850         if (nlink)
5851                 *nlink = ref_count;
5852 fail:
5853         btrfs_commit_transaction(trans, root);
5854 out:
5855         if (ret)
5856                 error(
5857         "fail to repair nlink of inode %llu root %llu name %s filetype %u",
5858                        root->objectid, ino, namebuf, filetype);
5859         else
5860                 printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n",
5861                        root->objectid, ino, namebuf, filetype);
5862
5863         /* research */
5864         btrfs_release_path(path);
5865         ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
5866         if (ret2 < 0)
5867                 return ret |= ret2;
5868         return ret;
5869 }
5870
5871 /*
5872  * Check INODE_ITEM and related ITEMs (the same inode number)
5873  * 1. check link count
5874  * 2. check inode ref/extref
5875  * 3. check dir item/index
5876  *
5877  * @ext_ref:    the EXTENDED_IREF feature
5878  *
5879  * Return 0 if no error occurred.
5880  * Return >0 for error or hit the traversal is done(by error bitmap)
5881  */
5882 static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
5883                             unsigned int ext_ref)
5884 {
5885         struct extent_buffer *node;
5886         struct btrfs_inode_item *ii;
5887         struct btrfs_key key;
5888         struct btrfs_key last_key;
5889         u64 inode_id;
5890         u32 mode;
5891         u64 nlink;
5892         u64 nbytes;
5893         u64 isize;
5894         u64 size = 0;
5895         u64 refs = 0;
5896         u64 extent_end = 0;
5897         u64 extent_size = 0;
5898         unsigned int dir;
5899         unsigned int nodatasum;
5900         int slot;
5901         int ret;
5902         int err = 0;
5903         char namebuf[BTRFS_NAME_LEN] = {0};
5904         u32 name_len = 0;
5905
5906         node = path->nodes[0];
5907         slot = path->slots[0];
5908
5909         btrfs_item_key_to_cpu(node, &key, slot);
5910         inode_id = key.objectid;
5911
5912         if (inode_id == BTRFS_ORPHAN_OBJECTID) {
5913                 ret = btrfs_next_item(root, path);
5914                 if (ret > 0)
5915                         err |= LAST_ITEM;
5916                 return err;
5917         }
5918
5919         ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item);
5920         isize = btrfs_inode_size(node, ii);
5921         nbytes = btrfs_inode_nbytes(node, ii);
5922         mode = btrfs_inode_mode(node, ii);
5923         dir = imode_to_type(mode) == BTRFS_FT_DIR;
5924         nlink = btrfs_inode_nlink(node, ii);
5925         nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM;
5926
5927         while (1) {
5928                 btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]);
5929                 ret = btrfs_next_item(root, path);
5930                 if (ret < 0) {
5931                         /* out will fill 'err' rusing current statistics */
5932                         goto out;
5933                 } else if (ret > 0) {
5934                         err |= LAST_ITEM;
5935                         goto out;
5936                 }
5937
5938                 node = path->nodes[0];
5939                 slot = path->slots[0];
5940                 btrfs_item_key_to_cpu(node, &key, slot);
5941                 if (key.objectid != inode_id)
5942                         goto out;
5943
5944                 switch (key.type) {
5945                 case BTRFS_INODE_REF_KEY:
5946                         ret = check_inode_ref(root, &key, path, namebuf,
5947                                               &name_len, &refs, mode);
5948                         err |= ret;
5949                         break;
5950                 case BTRFS_INODE_EXTREF_KEY:
5951                         if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref)
5952                                 warning("root %llu EXTREF[%llu %llu] isn't supported",
5953                                         root->objectid, key.objectid,
5954                                         key.offset);
5955                         ret = check_inode_extref(root, &key, node, slot, &refs,
5956                                                  mode);
5957                         err |= ret;
5958                         break;
5959                 case BTRFS_DIR_ITEM_KEY:
5960                 case BTRFS_DIR_INDEX_KEY:
5961                         if (!dir) {
5962                                 warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]",
5963                                         root->objectid, inode_id,
5964                                         imode_to_type(mode), key.objectid,
5965                                         key.offset);
5966                         }
5967                         ret = check_dir_item(root, &key, path, &size, ext_ref);
5968                         err |= ret;
5969                         break;
5970                 case BTRFS_EXTENT_DATA_KEY:
5971                         if (dir) {
5972                                 warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]",
5973                                         root->objectid, inode_id, key.objectid,
5974                                         key.offset);
5975                         }
5976                         ret = check_file_extent(root, &key, node, slot,
5977                                                 nodatasum, &extent_size,
5978                                                 &extent_end);
5979                         err |= ret;
5980                         break;
5981                 case BTRFS_XATTR_ITEM_KEY:
5982                         break;
5983                 default:
5984                         error("ITEM[%llu %u %llu] UNKNOWN TYPE",
5985                               key.objectid, key.type, key.offset);
5986                 }
5987         }
5988
5989 out:
5990         if (err & LAST_ITEM) {
5991                 btrfs_release_path(path);
5992                 ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0);
5993                 if (ret)
5994                         return err;
5995         }
5996
5997         /* verify INODE_ITEM nlink/isize/nbytes */
5998         if (dir) {
5999                 if (repair && (err & DIR_COUNT_AGAIN)) {
6000                         err &= ~DIR_COUNT_AGAIN;
6001                         count_dir_isize(root, inode_id, &size);
6002                 }
6003
6004                 if ((nlink != 1 || refs != 1) && repair) {
6005                         ret = repair_inode_nlinks_lowmem(root, path, inode_id,
6006                                 namebuf, name_len, refs, imode_to_type(mode),
6007                                 &nlink);
6008                 }
6009
6010                 if (nlink != 1) {
6011                         err |= LINK_COUNT_ERROR;
6012                         error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)",
6013                               root->objectid, inode_id, nlink);
6014                 }
6015
6016                 /*
6017                  * Just a warning, as dir inode nbytes is just an
6018                  * instructive value.
6019                  */
6020                 if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) {
6021                         warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u",
6022                                 root->objectid, inode_id,
6023                                 root->fs_info->nodesize);
6024                 }
6025
6026                 if (isize != size) {
6027                         if (repair)
6028                                 ret = repair_dir_isize_lowmem(root, path,
6029                                                               inode_id, size);
6030                         if (!repair || ret) {
6031                                 err |= ISIZE_ERROR;
6032                                 error(
6033                 "root %llu DIR INODE [%llu] size %llu not equal to %llu",
6034                                       root->objectid, inode_id, isize, size);
6035                         }
6036                 }
6037         } else {
6038                 if (nlink != refs) {
6039                         if (repair)
6040                                 ret = repair_inode_nlinks_lowmem(root, path,
6041                                          inode_id, namebuf, name_len, refs,
6042                                          imode_to_type(mode), &nlink);
6043                         if (!repair || ret) {
6044                                 err |= LINK_COUNT_ERROR;
6045                                 error(
6046                 "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)",
6047                                       root->objectid, inode_id, nlink, refs);
6048                         }
6049                 } else if (!nlink) {
6050                         if (repair)
6051                                 ret = repair_inode_orphan_item_lowmem(root,
6052                                                               path, inode_id);
6053                         if (!repair || ret) {
6054                                 err |= ORPHAN_ITEM;
6055                                 error("root %llu INODE[%llu] is orphan item",
6056                                       root->objectid, inode_id);
6057                         }
6058                 }
6059
6060                 if (!nbytes && !no_holes && extent_end < isize) {
6061                         if (repair)
6062                                 ret = punch_extent_hole(root, inode_id,
6063                                                 extent_end, isize - extent_end);
6064                         if (!repair || ret) {
6065                                 err |= NBYTES_ERROR;
6066                                 error(
6067         "root %llu INODE[%llu] size %llu should have a file extent hole",
6068                                       root->objectid, inode_id, isize);
6069                         }
6070                 }
6071
6072                 if (nbytes != extent_size) {
6073                         if (repair)
6074                                 ret = repair_inode_nbytes_lowmem(root, path,
6075                                                          inode_id, extent_size);
6076                         if (!repair || ret) {
6077                                 err |= NBYTES_ERROR;
6078                                 error(
6079         "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu",
6080                                       root->objectid, inode_id, nbytes,
6081                                       extent_size);
6082                         }
6083                 }
6084         }
6085
6086         if (err & LAST_ITEM)
6087                 btrfs_next_item(root, path);
6088         return err;
6089 }
6090
6091 /*
6092  * Insert the missing inode item and inode ref.
6093  *
6094  * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir.
6095  * Root dir should be handled specially because root dir is the root of fs.
6096  *
6097  * returns err (>0 or 0) after repair
6098  */
6099 static int repair_fs_first_inode(struct btrfs_root *root, int err)
6100 {
6101         struct btrfs_trans_handle *trans;
6102         struct btrfs_key key;
6103         struct btrfs_path path;
6104         int filetype = BTRFS_FT_DIR;
6105         int ret = 0;
6106
6107         btrfs_init_path(&path);
6108
6109         if (err & INODE_REF_MISSING) {
6110                 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6111                 key.type = BTRFS_INODE_REF_KEY;
6112                 key.offset = BTRFS_FIRST_FREE_OBJECTID;
6113
6114                 trans = btrfs_start_transaction(root, 1);
6115                 if (IS_ERR(trans)) {
6116                         ret = PTR_ERR(trans);
6117                         goto out;
6118                 }
6119
6120                 btrfs_release_path(&path);
6121                 ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
6122                 if (ret)
6123                         goto trans_fail;
6124
6125                 ret = btrfs_insert_inode_ref(trans, root, "..", 2,
6126                                              BTRFS_FIRST_FREE_OBJECTID,
6127                                              BTRFS_FIRST_FREE_OBJECTID, 0);
6128                 if (ret)
6129                         goto trans_fail;
6130
6131                 printf("Add INODE_REF[%llu %llu] name %s\n",
6132                        BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
6133                        "..");
6134                 err &= ~INODE_REF_MISSING;
6135 trans_fail:
6136                 if (ret)
6137                         error("fail to insert first inode's ref");
6138                 btrfs_commit_transaction(trans, root);
6139         }
6140
6141         if (err & INODE_ITEM_MISSING) {
6142                 ret = repair_inode_item_missing(root,
6143                                         BTRFS_FIRST_FREE_OBJECTID, filetype);
6144                 if (ret)
6145                         goto out;
6146                 err &= ~INODE_ITEM_MISSING;
6147         }
6148 out:
6149         if (ret)
6150                 error("fail to repair first inode");
6151         btrfs_release_path(&path);
6152         return err;
6153 }
6154
6155 /*
6156  * check first root dir's inode_item and inode_ref
6157  *
6158  * returns 0 means no error
6159  * returns >0 means error
6160  * returns <0 means fatal error
6161  */
6162 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
6163 {
6164         struct btrfs_path path;
6165         struct btrfs_key key;
6166         struct btrfs_inode_item *ii;
6167         u64 index;
6168         u32 mode;
6169         int err = 0;
6170         int ret;
6171
6172         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6173         key.type = BTRFS_INODE_ITEM_KEY;
6174         key.offset = 0;
6175
6176         /* For root being dropped, we don't need to check first inode */
6177         if (btrfs_root_refs(&root->root_item) == 0 &&
6178             btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
6179             BTRFS_FIRST_FREE_OBJECTID)
6180                 return 0;
6181
6182         btrfs_init_path(&path);
6183         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6184         if (ret < 0)
6185                 goto out;
6186         if (ret > 0) {
6187                 ret = 0;
6188                 err |= INODE_ITEM_MISSING;
6189         } else {
6190                 ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
6191                                     struct btrfs_inode_item);
6192                 mode = btrfs_inode_mode(path.nodes[0], ii);
6193                 if (imode_to_type(mode) != BTRFS_FT_DIR)
6194                         err |= INODE_ITEM_MISMATCH;
6195         }
6196
6197         /* lookup first inode ref */
6198         key.offset = BTRFS_FIRST_FREE_OBJECTID;
6199         key.type = BTRFS_INODE_REF_KEY;
6200         /* special index value */
6201         index = 0;
6202
6203         ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
6204         if (ret < 0)
6205                 goto out;
6206         err |= ret;
6207
6208 out:
6209         btrfs_release_path(&path);
6210
6211         if (err && repair)
6212                 err = repair_fs_first_inode(root, err);
6213
6214         if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
6215                 error("root dir INODE_ITEM is %s",
6216                       err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
6217         if (err & INODE_REF_MISSING)
6218                 error("root dir INODE_REF is missing");
6219
6220         return ret < 0 ? ret : err;
6221 }
6222
6223 static struct tree_backref *find_tree_backref(struct extent_record *rec,
6224                                                 u64 parent, u64 root)
6225 {
6226         struct rb_node *node;
6227         struct tree_backref *back = NULL;
6228         struct tree_backref match = {
6229                 .node = {
6230                         .is_data = 0,
6231                 },
6232         };
6233
6234         if (parent) {
6235                 match.parent = parent;
6236                 match.node.full_backref = 1;
6237         } else {
6238                 match.root = root;
6239         }
6240
6241         node = rb_search(&rec->backref_tree, &match.node.node,
6242                          (rb_compare_keys)compare_extent_backref, NULL);
6243         if (node)
6244                 back = to_tree_backref(rb_node_to_extent_backref(node));
6245
6246         return back;
6247 }
6248
6249 static struct data_backref *find_data_backref(struct extent_record *rec,
6250                                                 u64 parent, u64 root,
6251                                                 u64 owner, u64 offset,
6252                                                 int found_ref,
6253                                                 u64 disk_bytenr, u64 bytes)
6254 {
6255         struct rb_node *node;
6256         struct data_backref *back = NULL;
6257         struct data_backref match = {
6258                 .node = {
6259                         .is_data = 1,
6260                 },
6261                 .owner = owner,
6262                 .offset = offset,
6263                 .bytes = bytes,
6264                 .found_ref = found_ref,
6265                 .disk_bytenr = disk_bytenr,
6266         };
6267
6268         if (parent) {
6269                 match.parent = parent;
6270                 match.node.full_backref = 1;
6271         } else {
6272                 match.root = root;
6273         }
6274
6275         node = rb_search(&rec->backref_tree, &match.node.node,
6276                          (rb_compare_keys)compare_extent_backref, NULL);
6277         if (node)
6278                 back = to_data_backref(rb_node_to_extent_backref(node));
6279
6280         return back;
6281 }
6282 /*
6283  * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree
6284  * blocks and integrity of fs tree items.
6285  *
6286  * @root:         the root of the tree to be checked.
6287  * @ext_ref       feature EXTENDED_IREF is enable or not.
6288  * @account       if NOT 0 means check the tree (including tree)'s treeblocks.
6289  *                otherwise means check fs tree(s) items relationship and
6290  *                @root MUST be a fs tree root.
6291  * Returns 0      represents OK.
6292  * Returns not 0  represents error.
6293  */
6294 static int check_btrfs_root(struct btrfs_trans_handle *trans,
6295                             struct btrfs_root *root, unsigned int ext_ref,
6296                             int check_all)
6297
6298 {
6299         struct btrfs_path path;
6300         struct node_refs nrefs;
6301         struct btrfs_root_item *root_item = &root->root_item;
6302         int ret;
6303         int level;
6304         int err = 0;
6305
6306         memset(&nrefs, 0, sizeof(nrefs));
6307         if (!check_all) {
6308                 /*
6309                  * We need to manually check the first inode item (256)
6310                  * As the following traversal function will only start from
6311                  * the first inode item in the leaf, if inode item (256) is
6312                  * missing we will skip it forever.
6313                  */
6314                 ret = check_fs_first_inode(root, ext_ref);
6315                 if (ret < 0)
6316                         return ret;
6317         }
6318
6319
6320         level = btrfs_header_level(root->node);
6321         btrfs_init_path(&path);
6322
6323         if (btrfs_root_refs(root_item) > 0 ||
6324             btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6325                 path.nodes[level] = root->node;
6326                 path.slots[level] = 0;
6327                 extent_buffer_get(root->node);
6328         } else {
6329                 struct btrfs_key key;
6330
6331                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6332                 level = root_item->drop_level;
6333                 path.lowest_level = level;
6334                 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6335                 if (ret < 0)
6336                         goto out;
6337                 ret = 0;
6338         }
6339
6340         while (1) {
6341                 ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs,
6342                                         ext_ref, check_all);
6343
6344                 err |= !!ret;
6345
6346                 /* if ret is negative, walk shall stop */
6347                 if (ret < 0) {
6348                         ret = err;
6349                         break;
6350                 }
6351
6352                 ret = walk_up_tree_v2(root, &path, &level);
6353                 if (ret != 0) {
6354                         /* Normal exit, reset ret to err */
6355                         ret = err;
6356                         break;
6357                 }
6358         }
6359
6360 out:
6361         btrfs_release_path(&path);
6362         return ret;
6363 }
6364
6365 /*
6366  * Iterate all items in the tree and call check_inode_item() to check.
6367  *
6368  * @root:       the root of the tree to be checked.
6369  * @ext_ref:    the EXTENDED_IREF feature
6370  *
6371  * Return 0 if no error found.
6372  * Return <0 for error.
6373  */
6374 static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref)
6375 {
6376         reset_cached_block_groups(root->fs_info);
6377         return check_btrfs_root(NULL, root, ext_ref, 0);
6378 }
6379
6380 /*
6381  * Find the relative ref for root_ref and root_backref.
6382  *
6383  * @root:       the root of the root tree.
6384  * @ref_key:    the key of the root ref.
6385  *
6386  * Return 0 if no error occurred.
6387  */
6388 static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
6389                           struct extent_buffer *node, int slot)
6390 {
6391         struct btrfs_path path;
6392         struct btrfs_key key;
6393         struct btrfs_root_ref *ref;
6394         struct btrfs_root_ref *backref;
6395         char ref_name[BTRFS_NAME_LEN] = {0};
6396         char backref_name[BTRFS_NAME_LEN] = {0};
6397         u64 ref_dirid;
6398         u64 ref_seq;
6399         u32 ref_namelen;
6400         u64 backref_dirid;
6401         u64 backref_seq;
6402         u32 backref_namelen;
6403         u32 len;
6404         int ret;
6405         int err = 0;
6406
6407         ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref);
6408         ref_dirid = btrfs_root_ref_dirid(node, ref);
6409         ref_seq = btrfs_root_ref_sequence(node, ref);
6410         ref_namelen = btrfs_root_ref_name_len(node, ref);
6411
6412         if (ref_namelen <= BTRFS_NAME_LEN) {
6413                 len = ref_namelen;
6414         } else {
6415                 len = BTRFS_NAME_LEN;
6416                 warning("%s[%llu %llu] ref_name too long",
6417                         ref_key->type == BTRFS_ROOT_REF_KEY ?
6418                         "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid,
6419                         ref_key->offset);
6420         }
6421         read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len);
6422
6423         /* Find relative root_ref */
6424         key.objectid = ref_key->offset;
6425         key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type;
6426         key.offset = ref_key->objectid;
6427
6428         btrfs_init_path(&path);
6429         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
6430         if (ret) {
6431                 err |= ROOT_REF_MISSING;
6432                 error("%s[%llu %llu] couldn't find relative ref",
6433                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6434                       "ROOT_REF" : "ROOT_BACKREF",
6435                       ref_key->objectid, ref_key->offset);
6436                 goto out;
6437         }
6438
6439         backref = btrfs_item_ptr(path.nodes[0], path.slots[0],
6440                                  struct btrfs_root_ref);
6441         backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref);
6442         backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref);
6443         backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref);
6444
6445         if (backref_namelen <= BTRFS_NAME_LEN) {
6446                 len = backref_namelen;
6447         } else {
6448                 len = BTRFS_NAME_LEN;
6449                 warning("%s[%llu %llu] ref_name too long",
6450                         key.type == BTRFS_ROOT_REF_KEY ?
6451                         "ROOT_REF" : "ROOT_BACKREF",
6452                         key.objectid, key.offset);
6453         }
6454         read_extent_buffer(path.nodes[0], backref_name,
6455                            (unsigned long)(backref + 1), len);
6456
6457         if (ref_dirid != backref_dirid || ref_seq != backref_seq ||
6458             ref_namelen != backref_namelen ||
6459             strncmp(ref_name, backref_name, len)) {
6460                 err |= ROOT_REF_MISMATCH;
6461                 error("%s[%llu %llu] mismatch relative ref",
6462                       ref_key->type == BTRFS_ROOT_REF_KEY ?
6463                       "ROOT_REF" : "ROOT_BACKREF",
6464                       ref_key->objectid, ref_key->offset);
6465         }
6466 out:
6467         btrfs_release_path(&path);
6468         return err;
6469 }
6470
6471 /*
6472  * Check all fs/file tree in low_memory mode.
6473  *
6474  * 1. for fs tree root item, call check_fs_root_v2()
6475  * 2. for fs tree root ref/backref, call check_root_ref()
6476  *
6477  * Return 0 if no error occurred.
6478  */
6479 static int check_fs_roots_v2(struct btrfs_fs_info *fs_info)
6480 {
6481         struct btrfs_root *tree_root = fs_info->tree_root;
6482         struct btrfs_root *cur_root = NULL;
6483         struct btrfs_path path;
6484         struct btrfs_key key;
6485         struct extent_buffer *node;
6486         unsigned int ext_ref;
6487         int slot;
6488         int ret;
6489         int err = 0;
6490
6491         ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF);
6492
6493         btrfs_init_path(&path);
6494         key.objectid = BTRFS_FS_TREE_OBJECTID;
6495         key.offset = 0;
6496         key.type = BTRFS_ROOT_ITEM_KEY;
6497
6498         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
6499         if (ret < 0) {
6500                 err = ret;
6501                 goto out;
6502         } else if (ret > 0) {
6503                 err = -ENOENT;
6504                 goto out;
6505         }
6506
6507         while (1) {
6508                 node = path.nodes[0];
6509                 slot = path.slots[0];
6510                 btrfs_item_key_to_cpu(node, &key, slot);
6511                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
6512                         goto out;
6513                 if (key.type == BTRFS_ROOT_ITEM_KEY &&
6514                     fs_root_objectid(key.objectid)) {
6515                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
6516                                 cur_root = btrfs_read_fs_root_no_cache(fs_info,
6517                                                                        &key);
6518                         } else {
6519                                 key.offset = (u64)-1;
6520                                 cur_root = btrfs_read_fs_root(fs_info, &key);
6521                         }
6522
6523                         if (IS_ERR(cur_root)) {
6524                                 error("Fail to read fs/subvol tree: %lld",
6525                                       key.objectid);
6526                                 err = -EIO;
6527                                 goto next;
6528                         }
6529
6530                         ret = check_fs_root_v2(cur_root, ext_ref);
6531                         err |= ret;
6532
6533                         if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
6534                                 btrfs_free_fs_root(cur_root);
6535                 } else if (key.type == BTRFS_ROOT_REF_KEY ||
6536                                 key.type == BTRFS_ROOT_BACKREF_KEY) {
6537                         ret = check_root_ref(tree_root, &key, node, slot);
6538                         err |= ret;
6539                 }
6540 next:
6541                 ret = btrfs_next_item(tree_root, &path);
6542                 if (ret > 0)
6543                         goto out;
6544                 if (ret < 0) {
6545                         err = ret;
6546                         goto out;
6547                 }
6548         }
6549
6550 out:
6551         btrfs_release_path(&path);
6552         return err;
6553 }
6554
6555 static int do_check_fs_roots(struct btrfs_fs_info *fs_info,
6556                           struct cache_tree *root_cache)
6557 {
6558         int ret;
6559
6560         if (!ctx.progress_enabled)
6561                 fprintf(stderr, "checking fs roots\n");
6562         if (check_mode == CHECK_MODE_LOWMEM)
6563                 ret = check_fs_roots_v2(fs_info);
6564         else
6565                 ret = check_fs_roots(fs_info, root_cache);
6566
6567         return ret;
6568 }
6569
6570 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
6571 {
6572         struct extent_backref *back, *tmp;
6573         struct tree_backref *tback;
6574         struct data_backref *dback;
6575         u64 found = 0;
6576         int err = 0;
6577
6578         rbtree_postorder_for_each_entry_safe(back, tmp,
6579                                              &rec->backref_tree, node) {
6580                 if (!back->found_extent_tree) {
6581                         err = 1;
6582                         if (!print_errs)
6583                                 goto out;
6584                         if (back->is_data) {
6585                                 dback = to_data_backref(back);
6586                                 fprintf(stderr, "Data backref %llu %s %llu"
6587                                         " owner %llu offset %llu num_refs %lu"
6588                                         " not found in extent tree\n",
6589                                         (unsigned long long)rec->start,
6590                                         back->full_backref ?
6591                                         "parent" : "root",
6592                                         back->full_backref ?
6593                                         (unsigned long long)dback->parent:
6594                                         (unsigned long long)dback->root,
6595                                         (unsigned long long)dback->owner,
6596                                         (unsigned long long)dback->offset,
6597                                         (unsigned long)dback->num_refs);
6598                         } else {
6599                                 tback = to_tree_backref(back);
6600                                 fprintf(stderr, "Tree backref %llu parent %llu"
6601                                         " root %llu not found in extent tree\n",
6602                                         (unsigned long long)rec->start,
6603                                         (unsigned long long)tback->parent,
6604                                         (unsigned long long)tback->root);
6605                         }
6606                 }
6607                 if (!back->is_data && !back->found_ref) {
6608                         err = 1;
6609                         if (!print_errs)
6610                                 goto out;
6611                         tback = to_tree_backref(back);
6612                         fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
6613                                 (unsigned long long)rec->start,
6614                                 back->full_backref ? "parent" : "root",
6615                                 back->full_backref ?
6616                                 (unsigned long long)tback->parent :
6617                                 (unsigned long long)tback->root, back);
6618                 }
6619                 if (back->is_data) {
6620                         dback = to_data_backref(back);
6621                         if (dback->found_ref != dback->num_refs) {
6622                                 err = 1;
6623                                 if (!print_errs)
6624                                         goto out;
6625                                 fprintf(stderr, "Incorrect local backref count"
6626                                         " on %llu %s %llu owner %llu"
6627                                         " offset %llu found %u wanted %u back %p\n",
6628                                         (unsigned long long)rec->start,
6629                                         back->full_backref ?
6630                                         "parent" : "root",
6631                                         back->full_backref ?
6632                                         (unsigned long long)dback->parent:
6633                                         (unsigned long long)dback->root,
6634                                         (unsigned long long)dback->owner,
6635                                         (unsigned long long)dback->offset,
6636                                         dback->found_ref, dback->num_refs, back);
6637                         }
6638                         if (dback->disk_bytenr != rec->start) {
6639                                 err = 1;
6640                                 if (!print_errs)
6641                                         goto out;
6642                                 fprintf(stderr, "Backref disk bytenr does not"
6643                                         " match extent record, bytenr=%llu, "
6644                                         "ref bytenr=%llu\n",
6645                                         (unsigned long long)rec->start,
6646                                         (unsigned long long)dback->disk_bytenr);
6647                         }
6648
6649                         if (dback->bytes != rec->nr) {
6650                                 err = 1;
6651                                 if (!print_errs)
6652                                         goto out;
6653                                 fprintf(stderr, "Backref bytes do not match "
6654                                         "extent backref, bytenr=%llu, ref "
6655                                         "bytes=%llu, backref bytes=%llu\n",
6656                                         (unsigned long long)rec->start,
6657                                         (unsigned long long)rec->nr,
6658                                         (unsigned long long)dback->bytes);
6659                         }
6660                 }
6661                 if (!back->is_data) {
6662                         found += 1;
6663                 } else {
6664                         dback = to_data_backref(back);
6665                         found += dback->found_ref;
6666                 }
6667         }
6668         if (found != rec->refs) {
6669                 err = 1;
6670                 if (!print_errs)
6671                         goto out;
6672                 fprintf(stderr, "Incorrect global backref count "
6673                         "on %llu found %llu wanted %llu\n",
6674                         (unsigned long long)rec->start,
6675                         (unsigned long long)found,
6676                         (unsigned long long)rec->refs);
6677         }
6678 out:
6679         return err;
6680 }
6681
6682 static void __free_one_backref(struct rb_node *node)
6683 {
6684         struct extent_backref *back = rb_node_to_extent_backref(node);
6685
6686         free(back);
6687 }
6688
6689 static void free_all_extent_backrefs(struct extent_record *rec)
6690 {
6691         rb_free_nodes(&rec->backref_tree, __free_one_backref);
6692 }
6693
6694 static void free_extent_record_cache(struct cache_tree *extent_cache)
6695 {
6696         struct cache_extent *cache;
6697         struct extent_record *rec;
6698
6699         while (1) {
6700                 cache = first_cache_extent(extent_cache);
6701                 if (!cache)
6702                         break;
6703                 rec = container_of(cache, struct extent_record, cache);
6704                 remove_cache_extent(extent_cache, cache);
6705                 free_all_extent_backrefs(rec);
6706                 free(rec);
6707         }
6708 }
6709
6710 static int maybe_free_extent_rec(struct cache_tree *extent_cache,
6711                                  struct extent_record *rec)
6712 {
6713         if (rec->content_checked && rec->owner_ref_checked &&
6714             rec->extent_item_refs == rec->refs && rec->refs > 0 &&
6715             rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) &&
6716             !rec->bad_full_backref && !rec->crossing_stripes &&
6717             !rec->wrong_chunk_type) {
6718                 remove_cache_extent(extent_cache, &rec->cache);
6719                 free_all_extent_backrefs(rec);
6720                 list_del_init(&rec->list);
6721                 free(rec);
6722         }
6723         return 0;
6724 }
6725
6726 static int check_owner_ref(struct btrfs_root *root,
6727                             struct extent_record *rec,
6728                             struct extent_buffer *buf)
6729 {
6730         struct extent_backref *node, *tmp;
6731         struct tree_backref *back;
6732         struct btrfs_root *ref_root;
6733         struct btrfs_key key;
6734         struct btrfs_path path;
6735         struct extent_buffer *parent;
6736         int level;
6737         int found = 0;
6738         int ret;
6739
6740         rbtree_postorder_for_each_entry_safe(node, tmp,
6741                                              &rec->backref_tree, node) {
6742                 if (node->is_data)
6743                         continue;
6744                 if (!node->found_ref)
6745                         continue;
6746                 if (node->full_backref)
6747                         continue;
6748                 back = to_tree_backref(node);
6749                 if (btrfs_header_owner(buf) == back->root)
6750                         return 0;
6751         }
6752         BUG_ON(rec->is_root);
6753
6754         /* try to find the block by search corresponding fs tree */
6755         key.objectid = btrfs_header_owner(buf);
6756         key.type = BTRFS_ROOT_ITEM_KEY;
6757         key.offset = (u64)-1;
6758
6759         ref_root = btrfs_read_fs_root(root->fs_info, &key);
6760         if (IS_ERR(ref_root))
6761                 return 1;
6762
6763         level = btrfs_header_level(buf);
6764         if (level == 0)
6765                 btrfs_item_key_to_cpu(buf, &key, 0);
6766         else
6767                 btrfs_node_key_to_cpu(buf, &key, 0);
6768
6769         btrfs_init_path(&path);
6770         path.lowest_level = level + 1;
6771         ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0);
6772         if (ret < 0)
6773                 return 0;
6774
6775         parent = path.nodes[level + 1];
6776         if (parent && buf->start == btrfs_node_blockptr(parent,
6777                                                         path.slots[level + 1]))
6778                 found = 1;
6779
6780         btrfs_release_path(&path);
6781         return found ? 0 : 1;
6782 }
6783
6784 static int is_extent_tree_record(struct extent_record *rec)
6785 {
6786         struct extent_backref *node, *tmp;
6787         struct tree_backref *back;
6788         int is_extent = 0;
6789
6790         rbtree_postorder_for_each_entry_safe(node, tmp,
6791                                              &rec->backref_tree, node) {
6792                 if (node->is_data)
6793                         return 0;
6794                 back = to_tree_backref(node);
6795                 if (node->full_backref)
6796                         return 0;
6797                 if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
6798                         is_extent = 1;
6799         }
6800         return is_extent;
6801 }
6802
6803
6804 static int record_bad_block_io(struct btrfs_fs_info *info,
6805                                struct cache_tree *extent_cache,
6806                                u64 start, u64 len)
6807 {
6808         struct extent_record *rec;
6809         struct cache_extent *cache;
6810         struct btrfs_key key;
6811
6812         cache = lookup_cache_extent(extent_cache, start, len);
6813         if (!cache)
6814                 return 0;
6815
6816         rec = container_of(cache, struct extent_record, cache);
6817         if (!is_extent_tree_record(rec))
6818                 return 0;
6819
6820         btrfs_disk_key_to_cpu(&key, &rec->parent_key);
6821         return btrfs_add_corrupt_extent_record(info, &key, start, len, 0);
6822 }
6823
6824 static int swap_values(struct btrfs_root *root, struct btrfs_path *path,
6825                        struct extent_buffer *buf, int slot)
6826 {
6827         if (btrfs_header_level(buf)) {
6828                 struct btrfs_key_ptr ptr1, ptr2;
6829
6830                 read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot),
6831                                    sizeof(struct btrfs_key_ptr));
6832                 read_extent_buffer(buf, &ptr2,
6833                                    btrfs_node_key_ptr_offset(slot + 1),
6834                                    sizeof(struct btrfs_key_ptr));
6835                 write_extent_buffer(buf, &ptr1,
6836                                     btrfs_node_key_ptr_offset(slot + 1),
6837                                     sizeof(struct btrfs_key_ptr));
6838                 write_extent_buffer(buf, &ptr2,
6839                                     btrfs_node_key_ptr_offset(slot),
6840                                     sizeof(struct btrfs_key_ptr));
6841                 if (slot == 0) {
6842                         struct btrfs_disk_key key;
6843                         btrfs_node_key(buf, &key, 0);
6844                         btrfs_fixup_low_keys(root, path, &key,
6845                                              btrfs_header_level(buf) + 1);
6846                 }
6847         } else {
6848                 struct btrfs_item *item1, *item2;
6849                 struct btrfs_key k1, k2;
6850                 char *item1_data, *item2_data;
6851                 u32 item1_offset, item2_offset, item1_size, item2_size;
6852
6853                 item1 = btrfs_item_nr(slot);
6854                 item2 = btrfs_item_nr(slot + 1);
6855                 btrfs_item_key_to_cpu(buf, &k1, slot);
6856                 btrfs_item_key_to_cpu(buf, &k2, slot + 1);
6857                 item1_offset = btrfs_item_offset(buf, item1);
6858                 item2_offset = btrfs_item_offset(buf, item2);
6859                 item1_size = btrfs_item_size(buf, item1);
6860                 item2_size = btrfs_item_size(buf, item2);
6861
6862                 item1_data = malloc(item1_size);
6863                 if (!item1_data)
6864                         return -ENOMEM;
6865                 item2_data = malloc(item2_size);
6866                 if (!item2_data) {
6867                         free(item1_data);
6868                         return -ENOMEM;
6869                 }
6870
6871                 read_extent_buffer(buf, item1_data, item1_offset, item1_size);
6872                 read_extent_buffer(buf, item2_data, item2_offset, item2_size);
6873
6874                 write_extent_buffer(buf, item1_data, item2_offset, item2_size);
6875                 write_extent_buffer(buf, item2_data, item1_offset, item1_size);
6876                 free(item1_data);
6877                 free(item2_data);
6878
6879                 btrfs_set_item_offset(buf, item1, item2_offset);
6880                 btrfs_set_item_offset(buf, item2, item1_offset);
6881                 btrfs_set_item_size(buf, item1, item2_size);
6882                 btrfs_set_item_size(buf, item2, item1_size);
6883
6884                 path->slots[0] = slot;
6885                 btrfs_set_item_key_unsafe(root, path, &k2);
6886                 path->slots[0] = slot + 1;
6887                 btrfs_set_item_key_unsafe(root, path, &k1);
6888         }
6889         return 0;
6890 }
6891
6892 static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path)
6893 {
6894         struct extent_buffer *buf;
6895         struct btrfs_key k1, k2;
6896         int i;
6897         int level = path->lowest_level;
6898         int ret = -EIO;
6899
6900         buf = path->nodes[level];
6901         for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) {
6902                 if (level) {
6903                         btrfs_node_key_to_cpu(buf, &k1, i);
6904                         btrfs_node_key_to_cpu(buf, &k2, i + 1);
6905                 } else {
6906                         btrfs_item_key_to_cpu(buf, &k1, i);
6907                         btrfs_item_key_to_cpu(buf, &k2, i + 1);
6908                 }
6909                 if (btrfs_comp_cpu_keys(&k1, &k2) < 0)
6910                         continue;
6911                 ret = swap_values(root, path, buf, i);
6912                 if (ret)
6913                         break;
6914                 btrfs_mark_buffer_dirty(buf);
6915                 i = 0;
6916         }
6917         return ret;
6918 }
6919
6920 static int delete_bogus_item(struct btrfs_root *root,
6921                              struct btrfs_path *path,
6922                              struct extent_buffer *buf, int slot)
6923 {
6924         struct btrfs_key key;
6925         int nritems = btrfs_header_nritems(buf);
6926
6927         btrfs_item_key_to_cpu(buf, &key, slot);
6928
6929         /* These are all the keys we can deal with missing. */
6930         if (key.type != BTRFS_DIR_INDEX_KEY &&
6931             key.type != BTRFS_EXTENT_ITEM_KEY &&
6932             key.type != BTRFS_METADATA_ITEM_KEY &&
6933             key.type != BTRFS_TREE_BLOCK_REF_KEY &&
6934             key.type != BTRFS_EXTENT_DATA_REF_KEY)
6935                 return -1;
6936
6937         printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n",
6938                (unsigned long long)key.objectid, key.type,
6939                (unsigned long long)key.offset, slot, buf->start);
6940         memmove_extent_buffer(buf, btrfs_item_nr_offset(slot),
6941                               btrfs_item_nr_offset(slot + 1),
6942                               sizeof(struct btrfs_item) *
6943                               (nritems - slot - 1));
6944         btrfs_set_header_nritems(buf, nritems - 1);
6945         if (slot == 0) {
6946                 struct btrfs_disk_key disk_key;
6947
6948                 btrfs_item_key(buf, &disk_key, 0);
6949                 btrfs_fixup_low_keys(root, path, &disk_key, 1);
6950         }
6951         btrfs_mark_buffer_dirty(buf);
6952         return 0;
6953 }
6954
6955 static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path)
6956 {
6957         struct extent_buffer *buf;
6958         int i;
6959         int ret = 0;
6960
6961         /* We should only get this for leaves */
6962         BUG_ON(path->lowest_level);
6963         buf = path->nodes[0];
6964 again:
6965         for (i = 0; i < btrfs_header_nritems(buf); i++) {
6966                 unsigned int shift = 0, offset;
6967
6968                 if (i == 0 && btrfs_item_end_nr(buf, i) !=
6969                     BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6970                         if (btrfs_item_end_nr(buf, i) >
6971                             BTRFS_LEAF_DATA_SIZE(root->fs_info)) {
6972                                 ret = delete_bogus_item(root, path, buf, i);
6973                                 if (!ret)
6974                                         goto again;
6975                                 fprintf(stderr, "item is off the end of the "
6976                                         "leaf, can't fix\n");
6977                                 ret = -EIO;
6978                                 break;
6979                         }
6980                         shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) -
6981                                 btrfs_item_end_nr(buf, i);
6982                 } else if (i > 0 && btrfs_item_end_nr(buf, i) !=
6983                            btrfs_item_offset_nr(buf, i - 1)) {
6984                         if (btrfs_item_end_nr(buf, i) >
6985                             btrfs_item_offset_nr(buf, i - 1)) {
6986                                 ret = delete_bogus_item(root, path, buf, i);
6987                                 if (!ret)
6988                                         goto again;
6989                                 fprintf(stderr, "items overlap, can't fix\n");
6990                                 ret = -EIO;
6991                                 break;
6992                         }
6993                         shift = btrfs_item_offset_nr(buf, i - 1) -
6994                                 btrfs_item_end_nr(buf, i);
6995                 }
6996                 if (!shift)
6997                         continue;
6998
6999                 printf("Shifting item nr %d by %u bytes in block %llu\n",
7000                        i, shift, (unsigned long long)buf->start);
7001                 offset = btrfs_item_offset_nr(buf, i);
7002                 memmove_extent_buffer(buf,
7003                                       btrfs_leaf_data(buf) + offset + shift,
7004                                       btrfs_leaf_data(buf) + offset,
7005                                       btrfs_item_size_nr(buf, i));
7006                 btrfs_set_item_offset(buf, btrfs_item_nr(i),
7007                                       offset + shift);
7008                 btrfs_mark_buffer_dirty(buf);
7009         }
7010
7011         /*
7012          * We may have moved things, in which case we want to exit so we don't
7013          * write those changes out.  Once we have proper abort functionality in
7014          * progs this can be changed to something nicer.
7015          */
7016         BUG_ON(ret);
7017         return ret;
7018 }
7019
7020 /*
7021  * Attempt to fix basic block failures.  If we can't fix it for whatever reason
7022  * then just return -EIO.
7023  */
7024 static int try_to_fix_bad_block(struct btrfs_root *root,
7025                                 struct extent_buffer *buf,
7026                                 enum btrfs_tree_block_status status)
7027 {
7028         struct btrfs_trans_handle *trans;
7029         struct ulist *roots;
7030         struct ulist_node *node;
7031         struct btrfs_root *search_root;
7032         struct btrfs_path path;
7033         struct ulist_iterator iter;
7034         struct btrfs_key root_key, key;
7035         int ret;
7036
7037         if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER &&
7038             status != BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7039                 return -EIO;
7040
7041         ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots);
7042         if (ret)
7043                 return -EIO;
7044
7045         btrfs_init_path(&path);
7046         ULIST_ITER_INIT(&iter);
7047         while ((node = ulist_next(roots, &iter))) {
7048                 root_key.objectid = node->val;
7049                 root_key.type = BTRFS_ROOT_ITEM_KEY;
7050                 root_key.offset = (u64)-1;
7051
7052                 search_root = btrfs_read_fs_root(root->fs_info, &root_key);
7053                 if (IS_ERR(root)) {
7054                         ret = -EIO;
7055                         break;
7056                 }
7057
7058
7059                 trans = btrfs_start_transaction(search_root, 0);
7060                 if (IS_ERR(trans)) {
7061                         ret = PTR_ERR(trans);
7062                         break;
7063                 }
7064
7065                 path.lowest_level = btrfs_header_level(buf);
7066                 path.skip_check_block = 1;
7067                 if (path.lowest_level)
7068                         btrfs_node_key_to_cpu(buf, &key, 0);
7069                 else
7070                         btrfs_item_key_to_cpu(buf, &key, 0);
7071                 ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1);
7072                 if (ret) {
7073                         ret = -EIO;
7074                         btrfs_commit_transaction(trans, search_root);
7075                         break;
7076                 }
7077                 if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER)
7078                         ret = fix_key_order(search_root, &path);
7079                 else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS)
7080                         ret = fix_item_offset(search_root, &path);
7081                 if (ret) {
7082                         btrfs_commit_transaction(trans, search_root);
7083                         break;
7084                 }
7085                 btrfs_release_path(&path);
7086                 btrfs_commit_transaction(trans, search_root);
7087         }
7088         ulist_free(roots);
7089         btrfs_release_path(&path);
7090         return ret;
7091 }
7092
7093 static int check_block(struct btrfs_root *root,
7094                        struct cache_tree *extent_cache,
7095                        struct extent_buffer *buf, u64 flags)
7096 {
7097         struct extent_record *rec;
7098         struct cache_extent *cache;
7099         struct btrfs_key key;
7100         enum btrfs_tree_block_status status;
7101         int ret = 0;
7102         int level;
7103
7104         cache = lookup_cache_extent(extent_cache, buf->start, buf->len);
7105         if (!cache)
7106                 return 1;
7107         rec = container_of(cache, struct extent_record, cache);
7108         rec->generation = btrfs_header_generation(buf);
7109
7110         level = btrfs_header_level(buf);
7111         if (btrfs_header_nritems(buf) > 0) {
7112
7113                 if (level == 0)
7114                         btrfs_item_key_to_cpu(buf, &key, 0);
7115                 else
7116                         btrfs_node_key_to_cpu(buf, &key, 0);
7117
7118                 rec->info_objectid = key.objectid;
7119         }
7120         rec->info_level = level;
7121
7122         if (btrfs_is_leaf(buf))
7123                 status = btrfs_check_leaf(root, &rec->parent_key, buf);
7124         else
7125                 status = btrfs_check_node(root, &rec->parent_key, buf);
7126
7127         if (status != BTRFS_TREE_BLOCK_CLEAN) {
7128                 if (repair)
7129                         status = try_to_fix_bad_block(root, buf, status);
7130                 if (status != BTRFS_TREE_BLOCK_CLEAN) {
7131                         ret = -EIO;
7132                         fprintf(stderr, "bad block %llu\n",
7133                                 (unsigned long long)buf->start);
7134                 } else {
7135                         /*
7136                          * Signal to callers we need to start the scan over
7137                          * again since we'll have cowed blocks.
7138                          */
7139                         ret = -EAGAIN;
7140                 }
7141         } else {
7142                 rec->content_checked = 1;
7143                 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7144                         rec->owner_ref_checked = 1;
7145                 else {
7146                         ret = check_owner_ref(root, rec, buf);
7147                         if (!ret)
7148                                 rec->owner_ref_checked = 1;
7149                 }
7150         }
7151         if (!ret)
7152                 maybe_free_extent_rec(extent_cache, rec);
7153         return ret;
7154 }
7155
7156 #if 0
7157 static struct tree_backref *find_tree_backref(struct extent_record *rec,
7158                                                 u64 parent, u64 root)
7159 {
7160         struct list_head *cur = rec->backrefs.next;
7161         struct extent_backref *node;
7162         struct tree_backref *back;
7163
7164         while(cur != &rec->backrefs) {
7165                 node = to_extent_backref(cur);
7166                 cur = cur->next;
7167                 if (node->is_data)
7168                         continue;
7169                 back = to_tree_backref(node);
7170                 if (parent > 0) {
7171                         if (!node->full_backref)
7172                                 continue;
7173                         if (parent == back->parent)
7174                                 return back;
7175                 } else {
7176                         if (node->full_backref)
7177                                 continue;
7178                         if (back->root == root)
7179                                 return back;
7180                 }
7181         }
7182         return NULL;
7183 }
7184 #endif
7185
7186 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
7187                                                 u64 parent, u64 root)
7188 {
7189         struct tree_backref *ref = malloc(sizeof(*ref));
7190
7191         if (!ref)
7192                 return NULL;
7193         memset(&ref->node, 0, sizeof(ref->node));
7194         if (parent > 0) {
7195                 ref->parent = parent;
7196                 ref->node.full_backref = 1;
7197         } else {
7198                 ref->root = root;
7199                 ref->node.full_backref = 0;
7200         }
7201
7202         return ref;
7203 }
7204
7205 #if 0
7206 static struct data_backref *find_data_backref(struct extent_record *rec,
7207                                                 u64 parent, u64 root,
7208                                                 u64 owner, u64 offset,
7209                                                 int found_ref,
7210                                                 u64 disk_bytenr, u64 bytes)
7211 {
7212         struct list_head *cur = rec->backrefs.next;
7213         struct extent_backref *node;
7214         struct data_backref *back;
7215
7216         while(cur != &rec->backrefs) {
7217                 node = to_extent_backref(cur);
7218                 cur = cur->next;
7219                 if (!node->is_data)
7220                         continue;
7221                 back = to_data_backref(node);
7222                 if (parent > 0) {
7223                         if (!node->full_backref)
7224                                 continue;
7225                         if (parent == back->parent)
7226                                 return back;
7227                 } else {
7228                         if (node->full_backref)
7229                                 continue;
7230                         if (back->root == root && back->owner == owner &&
7231                             back->offset == offset) {
7232                                 if (found_ref && node->found_ref &&
7233                                     (back->bytes != bytes ||
7234                                     back->disk_bytenr != disk_bytenr))
7235                                         continue;
7236                                 return back;
7237                         }
7238                 }
7239         }
7240         return NULL;
7241 }
7242 #endif
7243
7244 static struct data_backref *alloc_data_backref(struct extent_record *rec,
7245                                                 u64 parent, u64 root,
7246                                                 u64 owner, u64 offset,
7247                                                 u64 max_size)
7248 {
7249         struct data_backref *ref = malloc(sizeof(*ref));
7250
7251         if (!ref)
7252                 return NULL;
7253         memset(&ref->node, 0, sizeof(ref->node));
7254         ref->node.is_data = 1;
7255
7256         if (parent > 0) {
7257                 ref->parent = parent;
7258                 ref->owner = 0;
7259                 ref->offset = 0;
7260                 ref->node.full_backref = 1;
7261         } else {
7262                 ref->root = root;
7263                 ref->owner = owner;
7264                 ref->offset = offset;
7265                 ref->node.full_backref = 0;
7266         }
7267         ref->bytes = max_size;
7268         ref->found_ref = 0;
7269         ref->num_refs = 0;
7270         if (max_size > rec->max_size)
7271                 rec->max_size = max_size;
7272         return ref;
7273 }
7274
7275 /* Check if the type of extent matches with its chunk */
7276 static void check_extent_type(struct extent_record *rec)
7277 {
7278         struct btrfs_block_group_cache *bg_cache;
7279
7280         bg_cache = btrfs_lookup_first_block_group(global_info, rec->start);
7281         if (!bg_cache)
7282                 return;
7283
7284         /* data extent, check chunk directly*/
7285         if (!rec->metadata) {
7286                 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA))
7287                         rec->wrong_chunk_type = 1;
7288                 return;
7289         }
7290
7291         /* metadata extent, check the obvious case first */
7292         if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM |
7293                                  BTRFS_BLOCK_GROUP_METADATA))) {
7294                 rec->wrong_chunk_type = 1;
7295                 return;
7296         }
7297
7298         /*
7299          * Check SYSTEM extent, as it's also marked as metadata, we can only
7300          * make sure it's a SYSTEM extent by its backref
7301          */
7302         if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
7303                 struct extent_backref *node;
7304                 struct tree_backref *tback;
7305                 u64 bg_type;
7306
7307                 node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
7308                 if (node->is_data) {
7309                         /* tree block shouldn't have data backref */
7310                         rec->wrong_chunk_type = 1;
7311                         return;
7312                 }
7313                 tback = container_of(node, struct tree_backref, node);
7314
7315                 if (tback->root == BTRFS_CHUNK_TREE_OBJECTID)
7316                         bg_type = BTRFS_BLOCK_GROUP_SYSTEM;
7317                 else
7318                         bg_type = BTRFS_BLOCK_GROUP_METADATA;
7319                 if (!(bg_cache->flags & bg_type))
7320                         rec->wrong_chunk_type = 1;
7321         }
7322 }
7323
7324 /*
7325  * Allocate a new extent record, fill default values from @tmpl and insert int
7326  * @extent_cache. Caller is supposed to make sure the [start,nr) is not in
7327  * the cache, otherwise it fails.
7328  */
7329 static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
7330                 struct extent_record *tmpl)
7331 {
7332         struct extent_record *rec;
7333         int ret = 0;
7334
7335         BUG_ON(tmpl->max_size == 0);
7336         rec = malloc(sizeof(*rec));
7337         if (!rec)
7338                 return -ENOMEM;
7339         rec->start = tmpl->start;
7340         rec->max_size = tmpl->max_size;
7341         rec->nr = max(tmpl->nr, tmpl->max_size);
7342         rec->found_rec = tmpl->found_rec;
7343         rec->content_checked = tmpl->content_checked;
7344         rec->owner_ref_checked = tmpl->owner_ref_checked;
7345         rec->num_duplicates = 0;
7346         rec->metadata = tmpl->metadata;
7347         rec->flag_block_full_backref = FLAG_UNSET;
7348         rec->bad_full_backref = 0;
7349         rec->crossing_stripes = 0;
7350         rec->wrong_chunk_type = 0;
7351         rec->is_root = tmpl->is_root;
7352         rec->refs = tmpl->refs;
7353         rec->extent_item_refs = tmpl->extent_item_refs;
7354         rec->parent_generation = tmpl->parent_generation;
7355         INIT_LIST_HEAD(&rec->backrefs);
7356         INIT_LIST_HEAD(&rec->dups);
7357         INIT_LIST_HEAD(&rec->list);
7358         rec->backref_tree = RB_ROOT;
7359         memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
7360         rec->cache.start = tmpl->start;
7361         rec->cache.size = tmpl->nr;
7362         ret = insert_cache_extent(extent_cache, &rec->cache);
7363         if (ret) {
7364                 free(rec);
7365                 return ret;
7366         }
7367         bytes_used += rec->nr;
7368
7369         if (tmpl->metadata)
7370                 rec->crossing_stripes = check_crossing_stripes(global_info,
7371                                 rec->start, global_info->nodesize);
7372         check_extent_type(rec);
7373         return ret;
7374 }
7375
7376 /*
7377  * Lookup and modify an extent, some values of @tmpl are interpreted verbatim,
7378  * some are hints:
7379  * - refs              - if found, increase refs
7380  * - is_root           - if found, set
7381  * - content_checked   - if found, set
7382  * - owner_ref_checked - if found, set
7383  *
7384  * If not found, create a new one, initialize and insert.
7385  */
7386 static int add_extent_rec(struct cache_tree *extent_cache,
7387                 struct extent_record *tmpl)
7388 {
7389         struct extent_record *rec;
7390         struct cache_extent *cache;
7391         int ret = 0;
7392         int dup = 0;
7393
7394         cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr);
7395         if (cache) {
7396                 rec = container_of(cache, struct extent_record, cache);
7397                 if (tmpl->refs)
7398                         rec->refs++;
7399                 if (rec->nr == 1)
7400                         rec->nr = max(tmpl->nr, tmpl->max_size);
7401
7402                 /*
7403                  * We need to make sure to reset nr to whatever the extent
7404                  * record says was the real size, this way we can compare it to
7405                  * the backrefs.
7406                  */
7407                 if (tmpl->found_rec) {
7408                         if (tmpl->start != rec->start || rec->found_rec) {
7409                                 struct extent_record *tmp;
7410
7411                                 dup = 1;
7412                                 if (list_empty(&rec->list))
7413                                         list_add_tail(&rec->list,
7414                                                       &duplicate_extents);
7415
7416                                 /*
7417                                  * We have to do this song and dance in case we
7418                                  * find an extent record that falls inside of
7419                                  * our current extent record but does not have
7420                                  * the same objectid.
7421                                  */
7422                                 tmp = malloc(sizeof(*tmp));
7423                                 if (!tmp)
7424                                         return -ENOMEM;
7425                                 tmp->start = tmpl->start;
7426                                 tmp->max_size = tmpl->max_size;
7427                                 tmp->nr = tmpl->nr;
7428                                 tmp->found_rec = 1;
7429                                 tmp->metadata = tmpl->metadata;
7430                                 tmp->extent_item_refs = tmpl->extent_item_refs;
7431                                 INIT_LIST_HEAD(&tmp->list);
7432                                 list_add_tail(&tmp->list, &rec->dups);
7433                                 rec->num_duplicates++;
7434                         } else {
7435                                 rec->nr = tmpl->nr;
7436                                 rec->found_rec = 1;
7437                         }
7438                 }
7439
7440                 if (tmpl->extent_item_refs && !dup) {
7441                         if (rec->extent_item_refs) {
7442                                 fprintf(stderr, "block %llu rec "
7443                                         "extent_item_refs %llu, passed %llu\n",
7444                                         (unsigned long long)tmpl->start,
7445                                         (unsigned long long)
7446                                                         rec->extent_item_refs,
7447                                         (unsigned long long)tmpl->extent_item_refs);
7448                         }
7449                         rec->extent_item_refs = tmpl->extent_item_refs;
7450                 }
7451                 if (tmpl->is_root)
7452                         rec->is_root = 1;
7453                 if (tmpl->content_checked)
7454                         rec->content_checked = 1;
7455                 if (tmpl->owner_ref_checked)
7456                         rec->owner_ref_checked = 1;
7457                 memcpy(&rec->parent_key, &tmpl->parent_key,
7458                                 sizeof(tmpl->parent_key));
7459                 if (tmpl->parent_generation)
7460                         rec->parent_generation = tmpl->parent_generation;
7461                 if (rec->max_size < tmpl->max_size)
7462                         rec->max_size = tmpl->max_size;
7463
7464                 /*
7465                  * A metadata extent can't cross stripe_len boundary, otherwise
7466                  * kernel scrub won't be able to handle it.
7467                  * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check
7468                  * it.
7469                  */
7470                 if (tmpl->metadata)
7471                         rec->crossing_stripes = check_crossing_stripes(
7472                                         global_info, rec->start,
7473                                         global_info->nodesize);
7474                 check_extent_type(rec);
7475                 maybe_free_extent_rec(extent_cache, rec);
7476                 return ret;
7477         }
7478
7479         ret = add_extent_rec_nolookup(extent_cache, tmpl);
7480
7481         return ret;
7482 }
7483
7484 static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
7485                             u64 parent, u64 root, int found_ref)
7486 {
7487         struct extent_record *rec;
7488         struct tree_backref *back;
7489         struct cache_extent *cache;
7490         int ret;
7491         bool insert = false;
7492
7493         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7494         if (!cache) {
7495                 struct extent_record tmpl;
7496
7497                 memset(&tmpl, 0, sizeof(tmpl));
7498                 tmpl.start = bytenr;
7499                 tmpl.nr = 1;
7500                 tmpl.metadata = 1;
7501                 tmpl.max_size = 1;
7502
7503                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7504                 if (ret)
7505                         return ret;
7506
7507                 /* really a bug in cache_extent implement now */
7508                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7509                 if (!cache)
7510                         return -ENOENT;
7511         }
7512
7513         rec = container_of(cache, struct extent_record, cache);
7514         if (rec->start != bytenr) {
7515                 /*
7516                  * Several cause, from unaligned bytenr to over lapping extents
7517                  */
7518                 return -EEXIST;
7519         }
7520
7521         back = find_tree_backref(rec, parent, root);
7522         if (!back) {
7523                 back = alloc_tree_backref(rec, parent, root);
7524                 if (!back)
7525                         return -ENOMEM;
7526                 insert = true;
7527         }
7528
7529         if (found_ref) {
7530                 if (back->node.found_ref) {
7531                         fprintf(stderr, "Extent back ref already exists "
7532                                 "for %llu parent %llu root %llu \n",
7533                                 (unsigned long long)bytenr,
7534                                 (unsigned long long)parent,
7535                                 (unsigned long long)root);
7536                 }
7537                 back->node.found_ref = 1;
7538         } else {
7539                 if (back->node.found_extent_tree) {
7540                         fprintf(stderr, "Extent back ref already exists "
7541                                 "for %llu parent %llu root %llu \n",
7542                                 (unsigned long long)bytenr,
7543                                 (unsigned long long)parent,
7544                                 (unsigned long long)root);
7545                 }
7546                 back->node.found_extent_tree = 1;
7547         }
7548         if (insert)
7549                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7550                         compare_extent_backref));
7551         check_extent_type(rec);
7552         maybe_free_extent_rec(extent_cache, rec);
7553         return 0;
7554 }
7555
7556 static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
7557                             u64 parent, u64 root, u64 owner, u64 offset,
7558                             u32 num_refs, int found_ref, u64 max_size)
7559 {
7560         struct extent_record *rec;
7561         struct data_backref *back;
7562         struct cache_extent *cache;
7563         int ret;
7564         bool insert = false;
7565
7566         cache = lookup_cache_extent(extent_cache, bytenr, 1);
7567         if (!cache) {
7568                 struct extent_record tmpl;
7569
7570                 memset(&tmpl, 0, sizeof(tmpl));
7571                 tmpl.start = bytenr;
7572                 tmpl.nr = 1;
7573                 tmpl.max_size = max_size;
7574
7575                 ret = add_extent_rec_nolookup(extent_cache, &tmpl);
7576                 if (ret)
7577                         return ret;
7578
7579                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
7580                 if (!cache)
7581                         abort();
7582         }
7583
7584         rec = container_of(cache, struct extent_record, cache);
7585         if (rec->max_size < max_size)
7586                 rec->max_size = max_size;
7587
7588         /*
7589          * If found_ref is set then max_size is the real size and must match the
7590          * existing refs.  So if we have already found a ref then we need to
7591          * make sure that this ref matches the existing one, otherwise we need
7592          * to add a new backref so we can notice that the backrefs don't match
7593          * and we need to figure out who is telling the truth.  This is to
7594          * account for that awful fsync bug I introduced where we'd end up with
7595          * a btrfs_file_extent_item that would have its length include multiple
7596          * prealloc extents or point inside of a prealloc extent.
7597          */
7598         back = find_data_backref(rec, parent, root, owner, offset, found_ref,
7599                                  bytenr, max_size);
7600         if (!back) {
7601                 back = alloc_data_backref(rec, parent, root, owner, offset,
7602                                           max_size);
7603                 BUG_ON(!back);
7604                 insert = true;
7605         }
7606
7607         if (found_ref) {
7608                 BUG_ON(num_refs != 1);
7609                 if (back->node.found_ref)
7610                         BUG_ON(back->bytes != max_size);
7611                 back->node.found_ref = 1;
7612                 back->found_ref += 1;
7613                 if (back->bytes != max_size || back->disk_bytenr != bytenr) {
7614                         back->bytes = max_size;
7615                         back->disk_bytenr = bytenr;
7616
7617                         /* Need to reinsert if not already in the tree */
7618                         if (!insert) {
7619                                 rb_erase(&back->node.node, &rec->backref_tree);
7620                                 insert = true;
7621                         }
7622                 }
7623                 rec->refs += 1;
7624                 rec->content_checked = 1;
7625                 rec->owner_ref_checked = 1;
7626         } else {
7627                 if (back->node.found_extent_tree) {
7628                         fprintf(stderr, "Extent back ref already exists "
7629                                 "for %llu parent %llu root %llu "
7630                                 "owner %llu offset %llu num_refs %lu\n",
7631                                 (unsigned long long)bytenr,
7632                                 (unsigned long long)parent,
7633                                 (unsigned long long)root,
7634                                 (unsigned long long)owner,
7635                                 (unsigned long long)offset,
7636                                 (unsigned long)num_refs);
7637                 }
7638                 back->num_refs = num_refs;
7639                 back->node.found_extent_tree = 1;
7640         }
7641         if (insert)
7642                 WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
7643                         compare_extent_backref));
7644
7645         maybe_free_extent_rec(extent_cache, rec);
7646         return 0;
7647 }
7648
7649 static int add_pending(struct cache_tree *pending,
7650                        struct cache_tree *seen, u64 bytenr, u32 size)
7651 {
7652         int ret;
7653         ret = add_cache_extent(seen, bytenr, size);
7654         if (ret)
7655                 return ret;
7656         add_cache_extent(pending, bytenr, size);
7657         return 0;
7658 }
7659
7660 static int pick_next_pending(struct cache_tree *pending,
7661                         struct cache_tree *reada,
7662                         struct cache_tree *nodes,
7663                         u64 last, struct block_info *bits, int bits_nr,
7664                         int *reada_bits)
7665 {
7666         unsigned long node_start = last;
7667         struct cache_extent *cache;
7668         int ret;
7669
7670         cache = search_cache_extent(reada, 0);
7671         if (cache) {
7672                 bits[0].start = cache->start;
7673                 bits[0].size = cache->size;
7674                 *reada_bits = 1;
7675                 return 1;
7676         }
7677         *reada_bits = 0;
7678         if (node_start > 32768)
7679                 node_start -= 32768;
7680
7681         cache = search_cache_extent(nodes, node_start);
7682         if (!cache)
7683                 cache = search_cache_extent(nodes, 0);
7684
7685         if (!cache) {
7686                  cache = search_cache_extent(pending, 0);
7687                  if (!cache)
7688                          return 0;
7689                  ret = 0;
7690                  do {
7691                          bits[ret].start = cache->start;
7692                          bits[ret].size = cache->size;
7693                          cache = next_cache_extent(cache);
7694                          ret++;
7695                  } while (cache && ret < bits_nr);
7696                  return ret;
7697         }
7698
7699         ret = 0;
7700         do {
7701                 bits[ret].start = cache->start;
7702                 bits[ret].size = cache->size;
7703                 cache = next_cache_extent(cache);
7704                 ret++;
7705         } while (cache && ret < bits_nr);
7706
7707         if (bits_nr - ret > 8) {
7708                 u64 lookup = bits[0].start + bits[0].size;
7709                 struct cache_extent *next;
7710                 next = search_cache_extent(pending, lookup);
7711                 while(next) {
7712                         if (next->start - lookup > 32768)
7713                                 break;
7714                         bits[ret].start = next->start;
7715                         bits[ret].size = next->size;
7716                         lookup = next->start + next->size;
7717                         ret++;
7718                         if (ret == bits_nr)
7719                                 break;
7720                         next = next_cache_extent(next);
7721                         if (!next)
7722                                 break;
7723                 }
7724         }
7725         return ret;
7726 }
7727
7728 static void free_chunk_record(struct cache_extent *cache)
7729 {
7730         struct chunk_record *rec;
7731
7732         rec = container_of(cache, struct chunk_record, cache);
7733         list_del_init(&rec->list);
7734         list_del_init(&rec->dextents);
7735         free(rec);
7736 }
7737
7738 void free_chunk_cache_tree(struct cache_tree *chunk_cache)
7739 {
7740         cache_tree_free_extents(chunk_cache, free_chunk_record);
7741 }
7742
7743 static void free_device_record(struct rb_node *node)
7744 {
7745         struct device_record *rec;
7746
7747         rec = container_of(node, struct device_record, node);
7748         free(rec);
7749 }
7750
7751 FREE_RB_BASED_TREE(device_cache, free_device_record);
7752
7753 int insert_block_group_record(struct block_group_tree *tree,
7754                               struct block_group_record *bg_rec)
7755 {
7756         int ret;
7757
7758         ret = insert_cache_extent(&tree->tree, &bg_rec->cache);
7759         if (ret)
7760                 return ret;
7761
7762         list_add_tail(&bg_rec->list, &tree->block_groups);
7763         return 0;
7764 }
7765
7766 static void free_block_group_record(struct cache_extent *cache)
7767 {
7768         struct block_group_record *rec;
7769
7770         rec = container_of(cache, struct block_group_record, cache);
7771         list_del_init(&rec->list);
7772         free(rec);
7773 }
7774
7775 void free_block_group_tree(struct block_group_tree *tree)
7776 {
7777         cache_tree_free_extents(&tree->tree, free_block_group_record);
7778 }
7779
7780 int insert_device_extent_record(struct device_extent_tree *tree,
7781                                 struct device_extent_record *de_rec)
7782 {
7783         int ret;
7784
7785         /*
7786          * Device extent is a bit different from the other extents, because
7787          * the extents which belong to the different devices may have the
7788          * same start and size, so we need use the special extent cache
7789          * search/insert functions.
7790          */
7791         ret = insert_cache_extent2(&tree->tree, &de_rec->cache);
7792         if (ret)
7793                 return ret;
7794
7795         list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans);
7796         list_add_tail(&de_rec->device_list, &tree->no_device_orphans);
7797         return 0;
7798 }
7799
7800 static void free_device_extent_record(struct cache_extent *cache)
7801 {
7802         struct device_extent_record *rec;
7803
7804         rec = container_of(cache, struct device_extent_record, cache);
7805         if (!list_empty(&rec->chunk_list))
7806                 list_del_init(&rec->chunk_list);
7807         if (!list_empty(&rec->device_list))
7808                 list_del_init(&rec->device_list);
7809         free(rec);
7810 }
7811
7812 void free_device_extent_tree(struct device_extent_tree *tree)
7813 {
7814         cache_tree_free_extents(&tree->tree, free_device_extent_record);
7815 }
7816
7817 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
7818 static int process_extent_ref_v0(struct cache_tree *extent_cache,
7819                                  struct extent_buffer *leaf, int slot)
7820 {
7821         struct btrfs_extent_ref_v0 *ref0;
7822         struct btrfs_key key;
7823         int ret;
7824
7825         btrfs_item_key_to_cpu(leaf, &key, slot);
7826         ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0);
7827         if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) {
7828                 ret = add_tree_backref(extent_cache, key.objectid, key.offset,
7829                                 0, 0);
7830         } else {
7831                 ret = add_data_backref(extent_cache, key.objectid, key.offset,
7832                                 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0);
7833         }
7834         return ret;
7835 }
7836 #endif
7837
7838 struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf,
7839                                             struct btrfs_key *key,
7840                                             int slot)
7841 {
7842         struct btrfs_chunk *ptr;
7843         struct chunk_record *rec;
7844         int num_stripes, i;
7845
7846         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7847         num_stripes = btrfs_chunk_num_stripes(leaf, ptr);
7848
7849         rec = calloc(1, btrfs_chunk_record_size(num_stripes));
7850         if (!rec) {
7851                 fprintf(stderr, "memory allocation failed\n");
7852                 exit(-1);
7853         }
7854
7855         INIT_LIST_HEAD(&rec->list);
7856         INIT_LIST_HEAD(&rec->dextents);
7857         rec->bg_rec = NULL;
7858
7859         rec->cache.start = key->offset;
7860         rec->cache.size = btrfs_chunk_length(leaf, ptr);
7861
7862         rec->generation = btrfs_header_generation(leaf);
7863
7864         rec->objectid = key->objectid;
7865         rec->type = key->type;
7866         rec->offset = key->offset;
7867
7868         rec->length = rec->cache.size;
7869         rec->owner = btrfs_chunk_owner(leaf, ptr);
7870         rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr);
7871         rec->type_flags = btrfs_chunk_type(leaf, ptr);
7872         rec->io_width = btrfs_chunk_io_width(leaf, ptr);
7873         rec->io_align = btrfs_chunk_io_align(leaf, ptr);
7874         rec->sector_size = btrfs_chunk_sector_size(leaf, ptr);
7875         rec->num_stripes = num_stripes;
7876         rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr);
7877
7878         for (i = 0; i < rec->num_stripes; ++i) {
7879                 rec->stripes[i].devid =
7880                         btrfs_stripe_devid_nr(leaf, ptr, i);
7881                 rec->stripes[i].offset =
7882                         btrfs_stripe_offset_nr(leaf, ptr, i);
7883                 read_extent_buffer(leaf, rec->stripes[i].dev_uuid,
7884                                 (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i),
7885                                 BTRFS_UUID_SIZE);
7886         }
7887
7888         return rec;
7889 }
7890
7891 static int process_chunk_item(struct cache_tree *chunk_cache,
7892                               struct btrfs_key *key, struct extent_buffer *eb,
7893                               int slot)
7894 {
7895         struct chunk_record *rec;
7896         struct btrfs_chunk *chunk;
7897         int ret = 0;
7898
7899         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
7900         /*
7901          * Do extra check for this chunk item,
7902          *
7903          * It's still possible one can craft a leaf with CHUNK_ITEM, with
7904          * wrong onwer(3) out of chunk tree, to pass both chunk tree check
7905          * and owner<->key_type check.
7906          */
7907         ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot,
7908                                       key->offset);
7909         if (ret < 0) {
7910                 error("chunk(%llu, %llu) is not valid, ignore it",
7911                       key->offset, btrfs_chunk_length(eb, chunk));
7912                 return 0;
7913         }
7914         rec = btrfs_new_chunk_record(eb, key, slot);
7915         ret = insert_cache_extent(chunk_cache, &rec->cache);
7916         if (ret) {
7917                 fprintf(stderr, "Chunk[%llu, %llu] existed.\n",
7918                         rec->offset, rec->length);
7919                 free(rec);
7920         }
7921
7922         return ret;
7923 }
7924
7925 static int process_device_item(struct rb_root *dev_cache,
7926                 struct btrfs_key *key, struct extent_buffer *eb, int slot)
7927 {
7928         struct btrfs_dev_item *ptr;
7929         struct device_record *rec;
7930         int ret = 0;
7931
7932         ptr = btrfs_item_ptr(eb,
7933                 slot, struct btrfs_dev_item);
7934
7935         rec = malloc(sizeof(*rec));
7936         if (!rec) {
7937                 fprintf(stderr, "memory allocation failed\n");
7938                 return -ENOMEM;
7939         }
7940
7941         rec->devid = key->offset;
7942         rec->generation = btrfs_header_generation(eb);
7943
7944         rec->objectid = key->objectid;
7945         rec->type = key->type;
7946         rec->offset = key->offset;
7947
7948         rec->devid = btrfs_device_id(eb, ptr);
7949         rec->total_byte = btrfs_device_total_bytes(eb, ptr);
7950         rec->byte_used = btrfs_device_bytes_used(eb, ptr);
7951
7952         ret = rb_insert(dev_cache, &rec->node, device_record_compare);
7953         if (ret) {
7954                 fprintf(stderr, "Device[%llu] existed.\n", rec->devid);
7955                 free(rec);
7956         }
7957
7958         return ret;
7959 }
7960
7961 struct block_group_record *
7962 btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key,
7963                              int slot)
7964 {
7965         struct btrfs_block_group_item *ptr;
7966         struct block_group_record *rec;
7967
7968         rec = calloc(1, sizeof(*rec));
7969         if (!rec) {
7970                 fprintf(stderr, "memory allocation failed\n");
7971                 exit(-1);
7972         }
7973
7974         rec->cache.start = key->objectid;
7975         rec->cache.size = key->offset;
7976
7977         rec->generation = btrfs_header_generation(leaf);
7978
7979         rec->objectid = key->objectid;
7980         rec->type = key->type;
7981         rec->offset = key->offset;
7982
7983         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item);
7984         rec->flags = btrfs_disk_block_group_flags(leaf, ptr);
7985
7986         INIT_LIST_HEAD(&rec->list);
7987
7988         return rec;
7989 }
7990
7991 static int process_block_group_item(struct block_group_tree *block_group_cache,
7992                                     struct btrfs_key *key,
7993                                     struct extent_buffer *eb, int slot)
7994 {
7995         struct block_group_record *rec;
7996         int ret = 0;
7997
7998         rec = btrfs_new_block_group_record(eb, key, slot);
7999         ret = insert_block_group_record(block_group_cache, rec);
8000         if (ret) {
8001                 fprintf(stderr, "Block Group[%llu, %llu] existed.\n",
8002                         rec->objectid, rec->offset);
8003                 free(rec);
8004         }
8005
8006         return ret;
8007 }
8008
8009 struct device_extent_record *
8010 btrfs_new_device_extent_record(struct extent_buffer *leaf,
8011                                struct btrfs_key *key, int slot)
8012 {
8013         struct device_extent_record *rec;
8014         struct btrfs_dev_extent *ptr;
8015
8016         rec = calloc(1, sizeof(*rec));
8017         if (!rec) {
8018                 fprintf(stderr, "memory allocation failed\n");
8019                 exit(-1);
8020         }
8021
8022         rec->cache.objectid = key->objectid;
8023         rec->cache.start = key->offset;
8024
8025         rec->generation = btrfs_header_generation(leaf);
8026
8027         rec->objectid = key->objectid;
8028         rec->type = key->type;
8029         rec->offset = key->offset;
8030
8031         ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8032         rec->chunk_objecteid =
8033                 btrfs_dev_extent_chunk_objectid(leaf, ptr);
8034         rec->chunk_offset =
8035                 btrfs_dev_extent_chunk_offset(leaf, ptr);
8036         rec->length = btrfs_dev_extent_length(leaf, ptr);
8037         rec->cache.size = rec->length;
8038
8039         INIT_LIST_HEAD(&rec->chunk_list);
8040         INIT_LIST_HEAD(&rec->device_list);
8041
8042         return rec;
8043 }
8044
8045 static int
8046 process_device_extent_item(struct device_extent_tree *dev_extent_cache,
8047                            struct btrfs_key *key, struct extent_buffer *eb,
8048                            int slot)
8049 {
8050         struct device_extent_record *rec;
8051         int ret;
8052
8053         rec = btrfs_new_device_extent_record(eb, key, slot);
8054         ret = insert_device_extent_record(dev_extent_cache, rec);
8055         if (ret) {
8056                 fprintf(stderr,
8057                         "Device extent[%llu, %llu, %llu] existed.\n",
8058                         rec->objectid, rec->offset, rec->length);
8059                 free(rec);
8060         }
8061
8062         return ret;
8063 }
8064
8065 static int process_extent_item(struct btrfs_root *root,
8066                                struct cache_tree *extent_cache,
8067                                struct extent_buffer *eb, int slot)
8068 {
8069         struct btrfs_extent_item *ei;
8070         struct btrfs_extent_inline_ref *iref;
8071         struct btrfs_extent_data_ref *dref;
8072         struct btrfs_shared_data_ref *sref;
8073         struct btrfs_key key;
8074         struct extent_record tmpl;
8075         unsigned long end;
8076         unsigned long ptr;
8077         int ret;
8078         int type;
8079         u32 item_size = btrfs_item_size_nr(eb, slot);
8080         u64 refs = 0;
8081         u64 offset;
8082         u64 num_bytes;
8083         int metadata = 0;
8084
8085         btrfs_item_key_to_cpu(eb, &key, slot);
8086
8087         if (key.type == BTRFS_METADATA_ITEM_KEY) {
8088                 metadata = 1;
8089                 num_bytes = root->fs_info->nodesize;
8090         } else {
8091                 num_bytes = key.offset;
8092         }
8093
8094         if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) {
8095                 error("ignoring invalid extent, bytenr %llu is not aligned to %u",
8096                       key.objectid, root->fs_info->sectorsize);
8097                 return -EIO;
8098         }
8099         if (item_size < sizeof(*ei)) {
8100 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
8101                 struct btrfs_extent_item_v0 *ei0;
8102                 if (item_size != sizeof(*ei0)) {
8103                         error(
8104         "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d",
8105                                 key.objectid, key.type, key.offset,
8106                                 btrfs_header_bytenr(eb), slot);
8107                         BUG();
8108                 }
8109                 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
8110                 refs = btrfs_extent_refs_v0(eb, ei0);
8111 #else
8112                 BUG();
8113 #endif
8114                 memset(&tmpl, 0, sizeof(tmpl));
8115                 tmpl.start = key.objectid;
8116                 tmpl.nr = num_bytes;
8117                 tmpl.extent_item_refs = refs;
8118                 tmpl.metadata = metadata;
8119                 tmpl.found_rec = 1;
8120                 tmpl.max_size = num_bytes;
8121
8122                 return add_extent_rec(extent_cache, &tmpl);
8123         }
8124
8125         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
8126         refs = btrfs_extent_refs(eb, ei);
8127         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
8128                 metadata = 1;
8129         else
8130                 metadata = 0;
8131         if (metadata && num_bytes != root->fs_info->nodesize) {
8132                 error("ignore invalid metadata extent, length %llu does not equal to %u",
8133                       num_bytes, root->fs_info->nodesize);
8134                 return -EIO;
8135         }
8136         if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) {
8137                 error("ignore invalid data extent, length %llu is not aligned to %u",
8138                       num_bytes, root->fs_info->sectorsize);
8139                 return -EIO;
8140         }
8141
8142         memset(&tmpl, 0, sizeof(tmpl));
8143         tmpl.start = key.objectid;
8144         tmpl.nr = num_bytes;
8145         tmpl.extent_item_refs = refs;
8146         tmpl.metadata = metadata;
8147         tmpl.found_rec = 1;
8148         tmpl.max_size = num_bytes;
8149         add_extent_rec(extent_cache, &tmpl);
8150
8151         ptr = (unsigned long)(ei + 1);
8152         if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
8153             key.type == BTRFS_EXTENT_ITEM_KEY)
8154                 ptr += sizeof(struct btrfs_tree_block_info);
8155
8156         end = (unsigned long)ei + item_size;
8157         while (ptr < end) {
8158                 iref = (struct btrfs_extent_inline_ref *)ptr;
8159                 type = btrfs_extent_inline_ref_type(eb, iref);
8160                 offset = btrfs_extent_inline_ref_offset(eb, iref);
8161                 switch (type) {
8162                 case BTRFS_TREE_BLOCK_REF_KEY:
8163                         ret = add_tree_backref(extent_cache, key.objectid,
8164                                         0, offset, 0);
8165                         if (ret < 0)
8166                                 error(
8167                         "add_tree_backref failed (extent items tree block): %s",
8168                                       strerror(-ret));
8169                         break;
8170                 case BTRFS_SHARED_BLOCK_REF_KEY:
8171                         ret = add_tree_backref(extent_cache, key.objectid,
8172                                         offset, 0, 0);
8173                         if (ret < 0)
8174                                 error(
8175                         "add_tree_backref failed (extent items shared block): %s",
8176                                       strerror(-ret));
8177                         break;
8178                 case BTRFS_EXTENT_DATA_REF_KEY:
8179                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
8180                         add_data_backref(extent_cache, key.objectid, 0,
8181                                         btrfs_extent_data_ref_root(eb, dref),
8182                                         btrfs_extent_data_ref_objectid(eb,
8183                                                                        dref),
8184                                         btrfs_extent_data_ref_offset(eb, dref),
8185                                         btrfs_extent_data_ref_count(eb, dref),
8186                                         0, num_bytes);
8187                         break;
8188                 case BTRFS_SHARED_DATA_REF_KEY:
8189                         sref = (struct btrfs_shared_data_ref *)(iref + 1);
8190                         add_data_backref(extent_cache, key.objectid, offset,
8191                                         0, 0, 0,
8192                                         btrfs_shared_data_ref_count(eb, sref),
8193                                         0, num_bytes);
8194                         break;
8195                 default:
8196                         fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
8197                                 key.objectid, key.type, num_bytes);
8198                         goto out;
8199                 }
8200                 ptr += btrfs_extent_inline_ref_size(type);
8201         }
8202         WARN_ON(ptr > end);
8203 out:
8204         return 0;
8205 }
8206
8207 static int check_cache_range(struct btrfs_root *root,
8208                              struct btrfs_block_group_cache *cache,
8209                              u64 offset, u64 bytes)
8210 {
8211         struct btrfs_free_space *entry;
8212         u64 *logical;
8213         u64 bytenr;
8214         int stripe_len;
8215         int i, nr, ret;
8216
8217         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8218                 bytenr = btrfs_sb_offset(i);
8219                 ret = btrfs_rmap_block(root->fs_info,
8220                                        cache->key.objectid, bytenr, 0,
8221                                        &logical, &nr, &stripe_len);
8222                 if (ret)
8223                         return ret;
8224
8225                 while (nr--) {
8226                         if (logical[nr] + stripe_len <= offset)
8227                                 continue;
8228                         if (offset + bytes <= logical[nr])
8229                                 continue;
8230                         if (logical[nr] == offset) {
8231                                 if (stripe_len >= bytes) {
8232                                         free(logical);
8233                                         return 0;
8234                                 }
8235                                 bytes -= stripe_len;
8236                                 offset += stripe_len;
8237                         } else if (logical[nr] < offset) {
8238                                 if (logical[nr] + stripe_len >=
8239                                     offset + bytes) {
8240                                         free(logical);
8241                                         return 0;
8242                                 }
8243                                 bytes = (offset + bytes) -
8244                                         (logical[nr] + stripe_len);
8245                                 offset = logical[nr] + stripe_len;
8246                         } else {
8247                                 /*
8248                                  * Could be tricky, the super may land in the
8249                                  * middle of the area we're checking.  First
8250                                  * check the easiest case, it's at the end.
8251                                  */
8252                                 if (logical[nr] + stripe_len >=
8253                                     bytes + offset) {
8254                                         bytes = logical[nr] - offset;
8255                                         continue;
8256                                 }
8257
8258                                 /* Check the left side */
8259                                 ret = check_cache_range(root, cache,
8260                                                         offset,
8261                                                         logical[nr] - offset);
8262                                 if (ret) {
8263                                         free(logical);
8264                                         return ret;
8265                                 }
8266
8267                                 /* Now we continue with the right side */
8268                                 bytes = (offset + bytes) -
8269                                         (logical[nr] + stripe_len);
8270                                 offset = logical[nr] + stripe_len;
8271                         }
8272                 }
8273
8274                 free(logical);
8275         }
8276
8277         entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes);
8278         if (!entry) {
8279                 fprintf(stderr, "There is no free space entry for %Lu-%Lu\n",
8280                         offset, offset+bytes);
8281                 return -EINVAL;
8282         }
8283
8284         if (entry->offset != offset) {
8285                 fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset,
8286                         entry->offset);
8287                 return -EINVAL;
8288         }
8289
8290         if (entry->bytes != bytes) {
8291                 fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n",
8292                         bytes, entry->bytes, offset);
8293                 return -EINVAL;
8294         }
8295
8296         unlink_free_space(cache->free_space_ctl, entry);
8297         free(entry);
8298         return 0;
8299 }
8300
8301 static int verify_space_cache(struct btrfs_root *root,
8302                               struct btrfs_block_group_cache *cache)
8303 {
8304         struct btrfs_path path;
8305         struct extent_buffer *leaf;
8306         struct btrfs_key key;
8307         u64 last;
8308         int ret = 0;
8309
8310         root = root->fs_info->extent_root;
8311
8312         last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET);
8313
8314         btrfs_init_path(&path);
8315         key.objectid = last;
8316         key.offset = 0;
8317         key.type = BTRFS_EXTENT_ITEM_KEY;
8318         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8319         if (ret < 0)
8320                 goto out;
8321         ret = 0;
8322         while (1) {
8323                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8324                         ret = btrfs_next_leaf(root, &path);
8325                         if (ret < 0)
8326                                 goto out;
8327                         if (ret > 0) {
8328                                 ret = 0;
8329                                 break;
8330                         }
8331                 }
8332                 leaf = path.nodes[0];
8333                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8334                 if (key.objectid >= cache->key.offset + cache->key.objectid)
8335                         break;
8336                 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
8337                     key.type != BTRFS_METADATA_ITEM_KEY) {
8338                         path.slots[0]++;
8339                         continue;
8340                 }
8341
8342                 if (last == key.objectid) {
8343                         if (key.type == BTRFS_EXTENT_ITEM_KEY)
8344                                 last = key.objectid + key.offset;
8345                         else
8346                                 last = key.objectid + root->fs_info->nodesize;
8347                         path.slots[0]++;
8348                         continue;
8349                 }
8350
8351                 ret = check_cache_range(root, cache, last,
8352                                         key.objectid - last);
8353                 if (ret)
8354                         break;
8355                 if (key.type == BTRFS_EXTENT_ITEM_KEY)
8356                         last = key.objectid + key.offset;
8357                 else
8358                         last = key.objectid + root->fs_info->nodesize;
8359                 path.slots[0]++;
8360         }
8361
8362         if (last < cache->key.objectid + cache->key.offset)
8363                 ret = check_cache_range(root, cache, last,
8364                                         cache->key.objectid +
8365                                         cache->key.offset - last);
8366
8367 out:
8368         btrfs_release_path(&path);
8369
8370         if (!ret &&
8371             !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) {
8372                 fprintf(stderr, "There are still entries left in the space "
8373                         "cache\n");
8374                 ret = -EINVAL;
8375         }
8376
8377         return ret;
8378 }
8379
8380 static int check_space_cache(struct btrfs_root *root)
8381 {
8382         struct btrfs_block_group_cache *cache;
8383         u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE;
8384         int ret;
8385         int error = 0;
8386
8387         if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL &&
8388             btrfs_super_generation(root->fs_info->super_copy) !=
8389             btrfs_super_cache_generation(root->fs_info->super_copy)) {
8390                 printf("cache and super generation don't match, space cache "
8391                        "will be invalidated\n");
8392                 return 0;
8393         }
8394
8395         if (ctx.progress_enabled) {
8396                 ctx.tp = TASK_FREE_SPACE;
8397                 task_start(ctx.info);
8398         }
8399
8400         while (1) {
8401                 cache = btrfs_lookup_first_block_group(root->fs_info, start);
8402                 if (!cache)
8403                         break;
8404
8405                 start = cache->key.objectid + cache->key.offset;
8406                 if (!cache->free_space_ctl) {
8407                         if (btrfs_init_free_space_ctl(cache,
8408                                                 root->fs_info->sectorsize)) {
8409                                 ret = -ENOMEM;
8410                                 break;
8411                         }
8412                 } else {
8413                         btrfs_remove_free_space_cache(cache);
8414                 }
8415
8416                 if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) {
8417                         ret = exclude_super_stripes(root, cache);
8418                         if (ret) {
8419                                 fprintf(stderr, "could not exclude super stripes: %s\n",
8420                                         strerror(-ret));
8421                                 error++;
8422                                 continue;
8423                         }
8424                         ret = load_free_space_tree(root->fs_info, cache);
8425                         free_excluded_extents(root, cache);
8426                         if (ret < 0) {
8427                                 fprintf(stderr, "could not load free space tree: %s\n",
8428                                         strerror(-ret));
8429                                 error++;
8430                                 continue;
8431                         }
8432                         error += ret;
8433                 } else {
8434                         ret = load_free_space_cache(root->fs_info, cache);
8435                         if (!ret)
8436                                 continue;
8437                 }
8438
8439                 ret = verify_space_cache(root, cache);
8440                 if (ret) {
8441                         fprintf(stderr, "cache appears valid but isn't %Lu\n",
8442                                 cache->key.objectid);
8443                         error++;
8444                 }
8445         }
8446
8447         task_stop(ctx.info);
8448
8449         return error ? -EINVAL : 0;
8450 }
8451
8452 static int check_extent_csums(struct btrfs_root *root, u64 bytenr,
8453                         u64 num_bytes, unsigned long leaf_offset,
8454                         struct extent_buffer *eb) {
8455
8456         struct btrfs_fs_info *fs_info = root->fs_info;
8457         u64 offset = 0;
8458         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
8459         char *data;
8460         unsigned long csum_offset;
8461         u32 csum;
8462         u32 csum_expected;
8463         u64 read_len;
8464         u64 data_checked = 0;
8465         u64 tmp;
8466         int ret = 0;
8467         int mirror;
8468         int num_copies;
8469
8470         if (num_bytes % fs_info->sectorsize)
8471                 return -EINVAL;
8472
8473         data = malloc(num_bytes);
8474         if (!data)
8475                 return -ENOMEM;
8476
8477         while (offset < num_bytes) {
8478                 mirror = 0;
8479 again:
8480                 read_len = num_bytes - offset;
8481                 /* read as much space once a time */
8482                 ret = read_extent_data(fs_info, data + offset,
8483                                 bytenr + offset, &read_len, mirror);
8484                 if (ret)
8485                         goto out;
8486                 data_checked = 0;
8487                 /* verify every 4k data's checksum */
8488                 while (data_checked < read_len) {
8489                         csum = ~(u32)0;
8490                         tmp = offset + data_checked;
8491
8492                         csum = btrfs_csum_data((char *)data + tmp,
8493                                                csum, fs_info->sectorsize);
8494                         btrfs_csum_final(csum, (u8 *)&csum);
8495
8496                         csum_offset = leaf_offset +
8497                                  tmp / fs_info->sectorsize * csum_size;
8498                         read_extent_buffer(eb, (char *)&csum_expected,
8499                                            csum_offset, csum_size);
8500                         /* try another mirror */
8501                         if (csum != csum_expected) {
8502                                 fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n",
8503                                                 mirror, bytenr + tmp,
8504                                                 csum, csum_expected);
8505                                 num_copies = btrfs_num_copies(root->fs_info,
8506                                                 bytenr, num_bytes);
8507                                 if (mirror < num_copies - 1) {
8508                                         mirror += 1;
8509                                         goto again;
8510                                 }
8511                         }
8512                         data_checked += fs_info->sectorsize;
8513                 }
8514                 offset += read_len;
8515         }
8516 out:
8517         free(data);
8518         return ret;
8519 }
8520
8521 static int check_extent_exists(struct btrfs_root *root, u64 bytenr,
8522                                u64 num_bytes)
8523 {
8524         struct btrfs_path path;
8525         struct extent_buffer *leaf;
8526         struct btrfs_key key;
8527         int ret;
8528
8529         btrfs_init_path(&path);
8530         key.objectid = bytenr;
8531         key.type = BTRFS_EXTENT_ITEM_KEY;
8532         key.offset = (u64)-1;
8533
8534 again:
8535         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path,
8536                                 0, 0);
8537         if (ret < 0) {
8538                 fprintf(stderr, "Error looking up extent record %d\n", ret);
8539                 btrfs_release_path(&path);
8540                 return ret;
8541         } else if (ret) {
8542                 if (path.slots[0] > 0) {
8543                         path.slots[0]--;
8544                 } else {
8545                         ret = btrfs_prev_leaf(root, &path);
8546                         if (ret < 0) {
8547                                 goto out;
8548                         } else if (ret > 0) {
8549                                 ret = 0;
8550                                 goto out;
8551                         }
8552                 }
8553         }
8554
8555         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8556
8557         /*
8558          * Block group items come before extent items if they have the same
8559          * bytenr, so walk back one more just in case.  Dear future traveller,
8560          * first congrats on mastering time travel.  Now if it's not too much
8561          * trouble could you go back to 2006 and tell Chris to make the
8562          * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the
8563          * EXTENT_ITEM_KEY please?
8564          */
8565         while (key.type > BTRFS_EXTENT_ITEM_KEY) {
8566                 if (path.slots[0] > 0) {
8567                         path.slots[0]--;
8568                 } else {
8569                         ret = btrfs_prev_leaf(root, &path);
8570                         if (ret < 0) {
8571                                 goto out;
8572                         } else if (ret > 0) {
8573                                 ret = 0;
8574                                 goto out;
8575                         }
8576                 }
8577                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
8578         }
8579
8580         while (num_bytes) {
8581                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8582                         ret = btrfs_next_leaf(root, &path);
8583                         if (ret < 0) {
8584                                 fprintf(stderr, "Error going to next leaf "
8585                                         "%d\n", ret);
8586                                 btrfs_release_path(&path);
8587                                 return ret;
8588                         } else if (ret) {
8589                                 break;
8590                         }
8591                 }
8592                 leaf = path.nodes[0];
8593                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8594                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
8595                         path.slots[0]++;
8596                         continue;
8597                 }
8598                 if (key.objectid + key.offset < bytenr) {
8599                         path.slots[0]++;
8600                         continue;
8601                 }
8602                 if (key.objectid > bytenr + num_bytes)
8603                         break;
8604
8605                 if (key.objectid == bytenr) {
8606                         if (key.offset >= num_bytes) {
8607                                 num_bytes = 0;
8608                                 break;
8609                         }
8610                         num_bytes -= key.offset;
8611                         bytenr += key.offset;
8612                 } else if (key.objectid < bytenr) {
8613                         if (key.objectid + key.offset >= bytenr + num_bytes) {
8614                                 num_bytes = 0;
8615                                 break;
8616                         }
8617                         num_bytes = (bytenr + num_bytes) -
8618                                 (key.objectid + key.offset);
8619                         bytenr = key.objectid + key.offset;
8620                 } else {
8621                         if (key.objectid + key.offset < bytenr + num_bytes) {
8622                                 u64 new_start = key.objectid + key.offset;
8623                                 u64 new_bytes = bytenr + num_bytes - new_start;
8624
8625                                 /*
8626                                  * Weird case, the extent is in the middle of
8627                                  * our range, we'll have to search one side
8628                                  * and then the other.  Not sure if this happens
8629                                  * in real life, but no harm in coding it up
8630                                  * anyway just in case.
8631                                  */
8632                                 btrfs_release_path(&path);
8633                                 ret = check_extent_exists(root, new_start,
8634                                                           new_bytes);
8635                                 if (ret) {
8636                                         fprintf(stderr, "Right section didn't "
8637                                                 "have a record\n");
8638                                         break;
8639                                 }
8640                                 num_bytes = key.objectid - bytenr;
8641                                 goto again;
8642                         }
8643                         num_bytes = key.objectid - bytenr;
8644                 }
8645                 path.slots[0]++;
8646         }
8647         ret = 0;
8648
8649 out:
8650         if (num_bytes && !ret) {
8651                 fprintf(stderr, "There are no extents for csum range "
8652                         "%Lu-%Lu\n", bytenr, bytenr+num_bytes);
8653                 ret = 1;
8654         }
8655
8656         btrfs_release_path(&path);
8657         return ret;
8658 }
8659
8660 static int check_csums(struct btrfs_root *root)
8661 {
8662         struct btrfs_path path;
8663         struct extent_buffer *leaf;
8664         struct btrfs_key key;
8665         u64 offset = 0, num_bytes = 0;
8666         u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
8667         int errors = 0;
8668         int ret;
8669         u64 data_len;
8670         unsigned long leaf_offset;
8671
8672         root = root->fs_info->csum_root;
8673         if (!extent_buffer_uptodate(root->node)) {
8674                 fprintf(stderr, "No valid csum tree found\n");
8675                 return -ENOENT;
8676         }
8677
8678         btrfs_init_path(&path);
8679         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
8680         key.type = BTRFS_EXTENT_CSUM_KEY;
8681         key.offset = 0;
8682         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
8683         if (ret < 0) {
8684                 fprintf(stderr, "Error searching csum tree %d\n", ret);
8685                 btrfs_release_path(&path);
8686                 return ret;
8687         }
8688
8689         if (ret > 0 && path.slots[0])
8690                 path.slots[0]--;
8691         ret = 0;
8692
8693         while (1) {
8694                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
8695                         ret = btrfs_next_leaf(root, &path);
8696                         if (ret < 0) {
8697                                 fprintf(stderr, "Error going to next leaf "
8698                                         "%d\n", ret);
8699                                 break;
8700                         }
8701                         if (ret)
8702                                 break;
8703                 }
8704                 leaf = path.nodes[0];
8705
8706                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
8707                 if (key.type != BTRFS_EXTENT_CSUM_KEY) {
8708                         path.slots[0]++;
8709                         continue;
8710                 }
8711
8712                 data_len = (btrfs_item_size_nr(leaf, path.slots[0]) /
8713                               csum_size) * root->fs_info->sectorsize;
8714                 if (!check_data_csum)
8715                         goto skip_csum_check;
8716                 leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
8717                 ret = check_extent_csums(root, key.offset, data_len,
8718                                          leaf_offset, leaf);
8719                 if (ret)
8720                         break;
8721 skip_csum_check:
8722                 if (!num_bytes) {
8723                         offset = key.offset;
8724                 } else if (key.offset != offset + num_bytes) {
8725                         ret = check_extent_exists(root, offset, num_bytes);
8726                         if (ret) {
8727                                 fprintf(stderr, "Csum exists for %Lu-%Lu but "
8728                                         "there is no extent record\n",
8729                                         offset, offset+num_bytes);
8730                                 errors++;
8731                         }
8732                         offset = key.offset;
8733                         num_bytes = 0;
8734                 }
8735                 num_bytes += data_len;
8736                 path.slots[0]++;
8737         }
8738
8739         btrfs_release_path(&path);
8740         return errors;
8741 }
8742
8743 static int is_dropped_key(struct btrfs_key *key,
8744                           struct btrfs_key *drop_key) {
8745         if (key->objectid < drop_key->objectid)
8746                 return 1;
8747         else if (key->objectid == drop_key->objectid) {
8748                 if (key->type < drop_key->type)
8749                         return 1;
8750                 else if (key->type == drop_key->type) {
8751                         if (key->offset < drop_key->offset)
8752                                 return 1;
8753                 }
8754         }
8755         return 0;
8756 }
8757
8758 /*
8759  * Here are the rules for FULL_BACKREF.
8760  *
8761  * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set.
8762  * 2) If btrfs_header_owner(buf) no longer points to buf then we have
8763  *      FULL_BACKREF set.
8764  * 3) We cowed the block walking down a reloc tree.  This is impossible to tell
8765  *    if it happened after the relocation occurred since we'll have dropped the
8766  *    reloc root, so it's entirely possible to have FULL_BACKREF set on buf and
8767  *    have no real way to know for sure.
8768  *
8769  * We process the blocks one root at a time, and we start from the lowest root
8770  * objectid and go to the highest.  So we can just lookup the owner backref for
8771  * the record and if we don't find it then we know it doesn't exist and we have
8772  * a FULL BACKREF.
8773  *
8774  * FIXME: if we ever start reclaiming root objectid's then we need to fix this
8775  * assumption and simply indicate that we _think_ that the FULL BACKREF needs to
8776  * be set or not and then we can check later once we've gathered all the refs.
8777  */
8778 static int calc_extent_flag(struct cache_tree *extent_cache,
8779                            struct extent_buffer *buf,
8780                            struct root_item_record *ri,
8781                            u64 *flags)
8782 {
8783         struct extent_record *rec;
8784         struct cache_extent *cache;
8785         struct tree_backref *tback;
8786         u64 owner = 0;
8787
8788         cache = lookup_cache_extent(extent_cache, buf->start, 1);
8789         /* we have added this extent before */
8790         if (!cache)
8791                 return -ENOENT;
8792
8793         rec = container_of(cache, struct extent_record, cache);
8794
8795         /*
8796          * Except file/reloc tree, we can not have
8797          * FULL BACKREF MODE
8798          */
8799         if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID)
8800                 goto normal;
8801         /*
8802          * root node
8803          */
8804         if (buf->start == ri->bytenr)
8805                 goto normal;
8806
8807         if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
8808                 goto full_backref;
8809
8810         owner = btrfs_header_owner(buf);
8811         if (owner == ri->objectid)
8812                 goto normal;
8813
8814         tback = find_tree_backref(rec, 0, owner);
8815         if (!tback)
8816                 goto full_backref;
8817 normal:
8818         *flags = 0;
8819         if (rec->flag_block_full_backref != FLAG_UNSET &&
8820             rec->flag_block_full_backref != 0)
8821                 rec->bad_full_backref = 1;
8822         return 0;
8823 full_backref:
8824         *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8825         if (rec->flag_block_full_backref != FLAG_UNSET &&
8826             rec->flag_block_full_backref != 1)
8827                 rec->bad_full_backref = 1;
8828         return 0;
8829 }
8830
8831 static void report_mismatch_key_root(u8 key_type, u64 rootid)
8832 {
8833         fprintf(stderr, "Invalid key type(");
8834         print_key_type(stderr, 0, key_type);
8835         fprintf(stderr, ") found in root(");
8836         print_objectid(stderr, rootid, 0);
8837         fprintf(stderr, ")\n");
8838 }
8839
8840 /*
8841  * Check if the key is valid with its extent buffer.
8842  *
8843  * This is a early check in case invalid key exists in a extent buffer
8844  * This is not comprehensive yet, but should prevent wrong key/item passed
8845  * further
8846  */
8847 static int check_type_with_root(u64 rootid, u8 key_type)
8848 {
8849         switch (key_type) {
8850         /* Only valid in chunk tree */
8851         case BTRFS_DEV_ITEM_KEY:
8852         case BTRFS_CHUNK_ITEM_KEY:
8853                 if (rootid != BTRFS_CHUNK_TREE_OBJECTID)
8854                         goto err;
8855                 break;
8856         /* valid in csum and log tree */
8857         case BTRFS_CSUM_TREE_OBJECTID:
8858                 if (!(rootid == BTRFS_TREE_LOG_OBJECTID ||
8859                       is_fstree(rootid)))
8860                         goto err;
8861                 break;
8862         case BTRFS_EXTENT_ITEM_KEY:
8863         case BTRFS_METADATA_ITEM_KEY:
8864         case BTRFS_BLOCK_GROUP_ITEM_KEY:
8865                 if (rootid != BTRFS_EXTENT_TREE_OBJECTID)
8866                         goto err;
8867                 break;
8868         case BTRFS_ROOT_ITEM_KEY:
8869                 if (rootid != BTRFS_ROOT_TREE_OBJECTID)
8870                         goto err;
8871                 break;
8872         case BTRFS_DEV_EXTENT_KEY:
8873                 if (rootid != BTRFS_DEV_TREE_OBJECTID)
8874                         goto err;
8875                 break;
8876         }
8877         return 0;
8878 err:
8879         report_mismatch_key_root(key_type, rootid);
8880         return -EINVAL;
8881 }
8882
8883 static int run_next_block(struct btrfs_root *root,
8884                           struct block_info *bits,
8885                           int bits_nr,
8886                           u64 *last,
8887                           struct cache_tree *pending,
8888                           struct cache_tree *seen,
8889                           struct cache_tree *reada,
8890                           struct cache_tree *nodes,
8891                           struct cache_tree *extent_cache,
8892                           struct cache_tree *chunk_cache,
8893                           struct rb_root *dev_cache,
8894                           struct block_group_tree *block_group_cache,
8895                           struct device_extent_tree *dev_extent_cache,
8896                           struct root_item_record *ri)
8897 {
8898         struct btrfs_fs_info *fs_info = root->fs_info;
8899         struct extent_buffer *buf;
8900         struct extent_record *rec = NULL;
8901         u64 bytenr;
8902         u32 size;
8903         u64 parent;
8904         u64 owner;
8905         u64 flags;
8906         u64 ptr;
8907         u64 gen = 0;
8908         int ret = 0;
8909         int i;
8910         int nritems;
8911         struct btrfs_key key;
8912         struct cache_extent *cache;
8913         int reada_bits;
8914
8915         nritems = pick_next_pending(pending, reada, nodes, *last, bits,
8916                                     bits_nr, &reada_bits);
8917         if (nritems == 0)
8918                 return 1;
8919
8920         if (!reada_bits) {
8921                 for(i = 0; i < nritems; i++) {
8922                         ret = add_cache_extent(reada, bits[i].start,
8923                                                bits[i].size);
8924                         if (ret == -EEXIST)
8925                                 continue;
8926
8927                         /* fixme, get the parent transid */
8928                         readahead_tree_block(fs_info, bits[i].start, 0);
8929                 }
8930         }
8931         *last = bits[0].start;
8932         bytenr = bits[0].start;
8933         size = bits[0].size;
8934
8935         cache = lookup_cache_extent(pending, bytenr, size);
8936         if (cache) {
8937                 remove_cache_extent(pending, cache);
8938                 free(cache);
8939         }
8940         cache = lookup_cache_extent(reada, bytenr, size);
8941         if (cache) {
8942                 remove_cache_extent(reada, cache);
8943                 free(cache);
8944         }
8945         cache = lookup_cache_extent(nodes, bytenr, size);
8946         if (cache) {
8947                 remove_cache_extent(nodes, cache);
8948                 free(cache);
8949         }
8950         cache = lookup_cache_extent(extent_cache, bytenr, size);
8951         if (cache) {
8952                 rec = container_of(cache, struct extent_record, cache);
8953                 gen = rec->parent_generation;
8954         }
8955
8956         /* fixme, get the real parent transid */
8957         buf = read_tree_block(root->fs_info, bytenr, gen);
8958         if (!extent_buffer_uptodate(buf)) {
8959                 record_bad_block_io(root->fs_info,
8960                                     extent_cache, bytenr, size);
8961                 goto out;
8962         }
8963
8964         nritems = btrfs_header_nritems(buf);
8965
8966         flags = 0;
8967         if (!init_extent_tree) {
8968                 ret = btrfs_lookup_extent_info(NULL, root, bytenr,
8969                                        btrfs_header_level(buf), 1, NULL,
8970                                        &flags);
8971                 if (ret < 0) {
8972                         ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8973                         if (ret < 0) {
8974                                 fprintf(stderr, "Couldn't calc extent flags\n");
8975                                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8976                         }
8977                 }
8978         } else {
8979                 flags = 0;
8980                 ret = calc_extent_flag(extent_cache, buf, ri, &flags);
8981                 if (ret < 0) {
8982                         fprintf(stderr, "Couldn't calc extent flags\n");
8983                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
8984                 }
8985         }
8986
8987         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
8988                 if (ri != NULL &&
8989                     ri->objectid != BTRFS_TREE_RELOC_OBJECTID &&
8990                     ri->objectid == btrfs_header_owner(buf)) {
8991                         /*
8992                          * Ok we got to this block from it's original owner and
8993                          * we have FULL_BACKREF set.  Relocation can leave
8994                          * converted blocks over so this is altogether possible,
8995                          * however it's not possible if the generation > the
8996                          * last snapshot, so check for this case.
8997                          */
8998                         if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) &&
8999                             btrfs_header_generation(buf) > ri->last_snapshot) {
9000                                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
9001                                 rec->bad_full_backref = 1;
9002                         }
9003                 }
9004         } else {
9005                 if (ri != NULL &&
9006                     (ri->objectid == BTRFS_TREE_RELOC_OBJECTID ||
9007                      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
9008                         flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9009                         rec->bad_full_backref = 1;
9010                 }
9011         }
9012
9013         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9014                 rec->flag_block_full_backref = 1;
9015                 parent = bytenr;
9016                 owner = 0;
9017         } else {
9018                 rec->flag_block_full_backref = 0;
9019                 parent = 0;
9020                 owner = btrfs_header_owner(buf);
9021         }
9022
9023         ret = check_block(root, extent_cache, buf, flags);
9024         if (ret)
9025                 goto out;
9026
9027         if (btrfs_is_leaf(buf)) {
9028                 btree_space_waste += btrfs_leaf_free_space(root, buf);
9029                 for (i = 0; i < nritems; i++) {
9030                         struct btrfs_file_extent_item *fi;
9031                         btrfs_item_key_to_cpu(buf, &key, i);
9032                         /*
9033                          * Check key type against the leaf owner.
9034                          * Could filter quite a lot of early error if
9035                          * owner is correct
9036                          */
9037                         if (check_type_with_root(btrfs_header_owner(buf),
9038                                                  key.type)) {
9039                                 fprintf(stderr, "ignoring invalid key\n");
9040                                 continue;
9041                         }
9042                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
9043                                 process_extent_item(root, extent_cache, buf,
9044                                                     i);
9045                                 continue;
9046                         }
9047                         if (key.type == BTRFS_METADATA_ITEM_KEY) {
9048                                 process_extent_item(root, extent_cache, buf,
9049                                                     i);
9050                                 continue;
9051                         }
9052                         if (key.type == BTRFS_EXTENT_CSUM_KEY) {
9053                                 total_csum_bytes +=
9054                                         btrfs_item_size_nr(buf, i);
9055                                 continue;
9056                         }
9057                         if (key.type == BTRFS_CHUNK_ITEM_KEY) {
9058                                 process_chunk_item(chunk_cache, &key, buf, i);
9059                                 continue;
9060                         }
9061                         if (key.type == BTRFS_DEV_ITEM_KEY) {
9062                                 process_device_item(dev_cache, &key, buf, i);
9063                                 continue;
9064                         }
9065                         if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
9066                                 process_block_group_item(block_group_cache,
9067                                         &key, buf, i);
9068                                 continue;
9069                         }
9070                         if (key.type == BTRFS_DEV_EXTENT_KEY) {
9071                                 process_device_extent_item(dev_extent_cache,
9072                                         &key, buf, i);
9073                                 continue;
9074
9075                         }
9076                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
9077 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
9078                                 process_extent_ref_v0(extent_cache, buf, i);
9079 #else
9080                                 BUG();
9081 #endif
9082                                 continue;
9083                         }
9084
9085                         if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
9086                                 ret = add_tree_backref(extent_cache,
9087                                                 key.objectid, 0, key.offset, 0);
9088                                 if (ret < 0)
9089                                         error(
9090                                 "add_tree_backref failed (leaf tree block): %s",
9091                                               strerror(-ret));
9092                                 continue;
9093                         }
9094                         if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
9095                                 ret = add_tree_backref(extent_cache,
9096                                                 key.objectid, key.offset, 0, 0);
9097                                 if (ret < 0)
9098                                         error(
9099                                 "add_tree_backref failed (leaf shared block): %s",
9100                                               strerror(-ret));
9101                                 continue;
9102                         }
9103                         if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
9104                                 struct btrfs_extent_data_ref *ref;
9105                                 ref = btrfs_item_ptr(buf, i,
9106                                                 struct btrfs_extent_data_ref);
9107                                 add_data_backref(extent_cache,
9108                                         key.objectid, 0,
9109                                         btrfs_extent_data_ref_root(buf, ref),
9110                                         btrfs_extent_data_ref_objectid(buf,
9111                                                                        ref),
9112                                         btrfs_extent_data_ref_offset(buf, ref),
9113                                         btrfs_extent_data_ref_count(buf, ref),
9114                                         0, root->fs_info->sectorsize);
9115                                 continue;
9116                         }
9117                         if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
9118                                 struct btrfs_shared_data_ref *ref;
9119                                 ref = btrfs_item_ptr(buf, i,
9120                                                 struct btrfs_shared_data_ref);
9121                                 add_data_backref(extent_cache,
9122                                         key.objectid, key.offset, 0, 0, 0,
9123                                         btrfs_shared_data_ref_count(buf, ref),
9124                                         0, root->fs_info->sectorsize);
9125                                 continue;
9126                         }
9127                         if (key.type == BTRFS_ORPHAN_ITEM_KEY) {
9128                                 struct bad_item *bad;
9129
9130                                 if (key.objectid == BTRFS_ORPHAN_OBJECTID)
9131                                         continue;
9132                                 if (!owner)
9133                                         continue;
9134                                 bad = malloc(sizeof(struct bad_item));
9135                                 if (!bad)
9136                                         continue;
9137                                 INIT_LIST_HEAD(&bad->list);
9138                                 memcpy(&bad->key, &key,
9139                                        sizeof(struct btrfs_key));
9140                                 bad->root_id = owner;
9141                                 list_add_tail(&bad->list, &delete_items);
9142                                 continue;
9143                         }
9144                         if (key.type != BTRFS_EXTENT_DATA_KEY)
9145                                 continue;
9146                         fi = btrfs_item_ptr(buf, i,
9147                                             struct btrfs_file_extent_item);
9148                         if (btrfs_file_extent_type(buf, fi) ==
9149                             BTRFS_FILE_EXTENT_INLINE)
9150                                 continue;
9151                         if (btrfs_file_extent_disk_bytenr(buf, fi) == 0)
9152                                 continue;
9153
9154                         data_bytes_allocated +=
9155                                 btrfs_file_extent_disk_num_bytes(buf, fi);
9156                         if (data_bytes_allocated < root->fs_info->sectorsize) {
9157                                 abort();
9158                         }
9159                         data_bytes_referenced +=
9160                                 btrfs_file_extent_num_bytes(buf, fi);
9161                         add_data_backref(extent_cache,
9162                                 btrfs_file_extent_disk_bytenr(buf, fi),
9163                                 parent, owner, key.objectid, key.offset -
9164                                 btrfs_file_extent_offset(buf, fi), 1, 1,
9165                                 btrfs_file_extent_disk_num_bytes(buf, fi));
9166                 }
9167         } else {
9168                 int level;
9169                 struct btrfs_key first_key;
9170
9171                 first_key.objectid = 0;
9172
9173                 if (nritems > 0)
9174                         btrfs_item_key_to_cpu(buf, &first_key, 0);
9175                 level = btrfs_header_level(buf);
9176                 for (i = 0; i < nritems; i++) {
9177                         struct extent_record tmpl;
9178
9179                         ptr = btrfs_node_blockptr(buf, i);
9180                         size = root->fs_info->nodesize;
9181                         btrfs_node_key_to_cpu(buf, &key, i);
9182                         if (ri != NULL) {
9183                                 if ((level == ri->drop_level)
9184                                     && is_dropped_key(&key, &ri->drop_key)) {
9185                                         continue;
9186                                 }
9187                         }
9188
9189                         memset(&tmpl, 0, sizeof(tmpl));
9190                         btrfs_cpu_key_to_disk(&tmpl.parent_key, &key);
9191                         tmpl.parent_generation = btrfs_node_ptr_generation(buf, i);
9192                         tmpl.start = ptr;
9193                         tmpl.nr = size;
9194                         tmpl.refs = 1;
9195                         tmpl.metadata = 1;
9196                         tmpl.max_size = size;
9197                         ret = add_extent_rec(extent_cache, &tmpl);
9198                         if (ret < 0)
9199                                 goto out;
9200
9201                         ret = add_tree_backref(extent_cache, ptr, parent,
9202                                         owner, 1);
9203                         if (ret < 0) {
9204                                 error(
9205                                 "add_tree_backref failed (non-leaf block): %s",
9206                                       strerror(-ret));
9207                                 continue;
9208                         }
9209
9210                         if (level > 1) {
9211                                 add_pending(nodes, seen, ptr, size);
9212                         } else {
9213                                 add_pending(pending, seen, ptr, size);
9214                         }
9215                 }
9216                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) -
9217                                       nritems) * sizeof(struct btrfs_key_ptr);
9218         }
9219         total_btree_bytes += buf->len;
9220         if (fs_root_objectid(btrfs_header_owner(buf)))
9221                 total_fs_tree_bytes += buf->len;
9222         if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
9223                 total_extent_tree_bytes += buf->len;
9224 out:
9225         free_extent_buffer(buf);
9226         return ret;
9227 }
9228
9229 static int add_root_to_pending(struct extent_buffer *buf,
9230                                struct cache_tree *extent_cache,
9231                                struct cache_tree *pending,
9232                                struct cache_tree *seen,
9233                                struct cache_tree *nodes,
9234                                u64 objectid)
9235 {
9236         struct extent_record tmpl;
9237         int ret;
9238
9239         if (btrfs_header_level(buf) > 0)
9240                 add_pending(nodes, seen, buf->start, buf->len);
9241         else
9242                 add_pending(pending, seen, buf->start, buf->len);
9243
9244         memset(&tmpl, 0, sizeof(tmpl));
9245         tmpl.start = buf->start;
9246         tmpl.nr = buf->len;
9247         tmpl.is_root = 1;
9248         tmpl.refs = 1;
9249         tmpl.metadata = 1;
9250         tmpl.max_size = buf->len;
9251         add_extent_rec(extent_cache, &tmpl);
9252
9253         if (objectid == BTRFS_TREE_RELOC_OBJECTID ||
9254             btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
9255                 ret = add_tree_backref(extent_cache, buf->start, buf->start,
9256                                 0, 1);
9257         else
9258                 ret = add_tree_backref(extent_cache, buf->start, 0, objectid,
9259                                 1);
9260         return ret;
9261 }
9262
9263 /* as we fix the tree, we might be deleting blocks that
9264  * we're tracking for repair.  This hook makes sure we
9265  * remove any backrefs for blocks as we are fixing them.
9266  */
9267 static int free_extent_hook(struct btrfs_trans_handle *trans,
9268                             struct btrfs_root *root,
9269                             u64 bytenr, u64 num_bytes, u64 parent,
9270                             u64 root_objectid, u64 owner, u64 offset,
9271                             int refs_to_drop)
9272 {
9273         struct extent_record *rec;
9274         struct cache_extent *cache;
9275         int is_data;
9276         struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache;
9277
9278         is_data = owner >= BTRFS_FIRST_FREE_OBJECTID;
9279         cache = lookup_cache_extent(extent_cache, bytenr, num_bytes);
9280         if (!cache)
9281                 return 0;
9282
9283         rec = container_of(cache, struct extent_record, cache);
9284         if (is_data) {
9285                 struct data_backref *back;
9286                 back = find_data_backref(rec, parent, root_objectid, owner,
9287                                          offset, 1, bytenr, num_bytes);
9288                 if (!back)
9289                         goto out;
9290                 if (back->node.found_ref) {
9291                         back->found_ref -= refs_to_drop;
9292                         if (rec->refs)
9293                                 rec->refs -= refs_to_drop;
9294                 }
9295                 if (back->node.found_extent_tree) {
9296                         back->num_refs -= refs_to_drop;
9297                         if (rec->extent_item_refs)
9298                                 rec->extent_item_refs -= refs_to_drop;
9299                 }
9300                 if (back->found_ref == 0)
9301                         back->node.found_ref = 0;
9302                 if (back->num_refs == 0)
9303                         back->node.found_extent_tree = 0;
9304
9305                 if (!back->node.found_extent_tree && back->node.found_ref) {
9306                         rb_erase(&back->node.node, &rec->backref_tree);
9307                         free(back);
9308                 }
9309         } else {
9310                 struct tree_backref *back;
9311                 back = find_tree_backref(rec, parent, root_objectid);
9312                 if (!back)
9313                         goto out;
9314                 if (back->node.found_ref) {
9315                         if (rec->refs)
9316                                 rec->refs--;
9317                         back->node.found_ref = 0;
9318                 }
9319                 if (back->node.found_extent_tree) {
9320                         if (rec->extent_item_refs)
9321                                 rec->extent_item_refs--;
9322                         back->node.found_extent_tree = 0;
9323                 }
9324                 if (!back->node.found_extent_tree && back->node.found_ref) {
9325                         rb_erase(&back->node.node, &rec->backref_tree);
9326                         free(back);
9327                 }
9328         }
9329         maybe_free_extent_rec(extent_cache, rec);
9330 out:
9331         return 0;
9332 }
9333
9334 static int delete_extent_records(struct btrfs_trans_handle *trans,
9335                                  struct btrfs_root *root,
9336                                  struct btrfs_path *path,
9337                                  u64 bytenr)
9338 {
9339         struct btrfs_key key;
9340         struct btrfs_key found_key;
9341         struct extent_buffer *leaf;
9342         int ret;
9343         int slot;
9344
9345
9346         key.objectid = bytenr;
9347         key.type = (u8)-1;
9348         key.offset = (u64)-1;
9349
9350         while(1) {
9351                 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
9352                                         &key, path, 0, 1);
9353                 if (ret < 0)
9354                         break;
9355
9356                 if (ret > 0) {
9357                         ret = 0;
9358                         if (path->slots[0] == 0)
9359                                 break;
9360                         path->slots[0]--;
9361                 }
9362                 ret = 0;
9363
9364                 leaf = path->nodes[0];
9365                 slot = path->slots[0];
9366
9367                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
9368                 if (found_key.objectid != bytenr)
9369                         break;
9370
9371                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
9372                     found_key.type != BTRFS_METADATA_ITEM_KEY &&
9373                     found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
9374                     found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
9375                     found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
9376                     found_key.type != BTRFS_SHARED_BLOCK_REF_KEY &&
9377                     found_key.type != BTRFS_SHARED_DATA_REF_KEY) {
9378                         btrfs_release_path(path);
9379                         if (found_key.type == 0) {
9380                                 if (found_key.offset == 0)
9381                                         break;
9382                                 key.offset = found_key.offset - 1;
9383                                 key.type = found_key.type;
9384                         }
9385                         key.type = found_key.type - 1;
9386                         key.offset = (u64)-1;
9387                         continue;
9388                 }
9389
9390                 fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n",
9391                         found_key.objectid, found_key.type, found_key.offset);
9392
9393                 ret = btrfs_del_item(trans, root->fs_info->extent_root, path);
9394                 if (ret)
9395                         break;
9396                 btrfs_release_path(path);
9397
9398                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
9399                     found_key.type == BTRFS_METADATA_ITEM_KEY) {
9400                         u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
9401                                 found_key.offset : root->fs_info->nodesize;
9402
9403                         ret = btrfs_update_block_group(root, bytenr,
9404                                                        bytes, 0, 0);
9405                         if (ret)
9406                                 break;
9407                 }
9408         }
9409
9410         btrfs_release_path(path);
9411         return ret;
9412 }
9413
9414 /*
9415  * for a single backref, this will allocate a new extent
9416  * and add the backref to it.
9417  */
9418 static int record_extent(struct btrfs_trans_handle *trans,
9419                          struct btrfs_fs_info *info,
9420                          struct btrfs_path *path,
9421                          struct extent_record *rec,
9422                          struct extent_backref *back,
9423                          int allocated, u64 flags)
9424 {
9425         int ret = 0;
9426         struct btrfs_root *extent_root = info->extent_root;
9427         struct extent_buffer *leaf;
9428         struct btrfs_key ins_key;
9429         struct btrfs_extent_item *ei;
9430         struct data_backref *dback;
9431         struct btrfs_tree_block_info *bi;
9432
9433         if (!back->is_data)
9434                 rec->max_size = max_t(u64, rec->max_size,
9435                                     info->nodesize);
9436
9437         if (!allocated) {
9438                 u32 item_size = sizeof(*ei);
9439
9440                 if (!back->is_data)
9441                         item_size += sizeof(*bi);
9442
9443                 ins_key.objectid = rec->start;
9444                 ins_key.offset = rec->max_size;
9445                 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
9446
9447                 ret = btrfs_insert_empty_item(trans, extent_root, path,
9448                                         &ins_key, item_size);
9449                 if (ret)
9450                         goto fail;
9451
9452                 leaf = path->nodes[0];
9453                 ei = btrfs_item_ptr(leaf, path->slots[0],
9454                                     struct btrfs_extent_item);
9455
9456                 btrfs_set_extent_refs(leaf, ei, 0);
9457                 btrfs_set_extent_generation(leaf, ei, rec->generation);
9458
9459                 if (back->is_data) {
9460                         btrfs_set_extent_flags(leaf, ei,
9461                                                BTRFS_EXTENT_FLAG_DATA);
9462                 } else {
9463                         struct btrfs_disk_key copy_key;;
9464
9465                         bi = (struct btrfs_tree_block_info *)(ei + 1);
9466                         memset_extent_buffer(leaf, 0, (unsigned long)bi,
9467                                              sizeof(*bi));
9468
9469                         btrfs_set_disk_key_objectid(&copy_key,
9470                                                     rec->info_objectid);
9471                         btrfs_set_disk_key_type(&copy_key, 0);
9472                         btrfs_set_disk_key_offset(&copy_key, 0);
9473
9474                         btrfs_set_tree_block_level(leaf, bi, rec->info_level);
9475                         btrfs_set_tree_block_key(leaf, bi, &copy_key);
9476
9477                         btrfs_set_extent_flags(leaf, ei,
9478                                                BTRFS_EXTENT_FLAG_TREE_BLOCK | flags);
9479                 }
9480
9481                 btrfs_mark_buffer_dirty(leaf);
9482                 ret = btrfs_update_block_group(extent_root, rec->start,
9483                                                rec->max_size, 1, 0);
9484                 if (ret)
9485                         goto fail;
9486                 btrfs_release_path(path);
9487         }
9488
9489         if (back->is_data) {
9490                 u64 parent;
9491                 int i;
9492
9493                 dback = to_data_backref(back);
9494                 if (back->full_backref)
9495                         parent = dback->parent;
9496                 else
9497                         parent = 0;
9498
9499                 for (i = 0; i < dback->found_ref; i++) {
9500                         /* if parent != 0, we're doing a full backref
9501                          * passing BTRFS_FIRST_FREE_OBJECTID as the owner
9502                          * just makes the backref allocator create a data
9503                          * backref
9504                          */
9505                         ret = btrfs_inc_extent_ref(trans, info->extent_root,
9506                                                    rec->start, rec->max_size,
9507                                                    parent,
9508                                                    dback->root,
9509                                                    parent ?
9510                                                    BTRFS_FIRST_FREE_OBJECTID :
9511                                                    dback->owner,
9512                                                    dback->offset);
9513                         if (ret)
9514                                 break;
9515                 }
9516                 fprintf(stderr, "adding new data backref"
9517                                 " on %llu %s %llu owner %llu"
9518                                 " offset %llu found %d\n",
9519                                 (unsigned long long)rec->start,
9520                                 back->full_backref ?
9521                                 "parent" : "root",
9522                                 back->full_backref ?
9523                                 (unsigned long long)parent :
9524                                 (unsigned long long)dback->root,
9525                                 (unsigned long long)dback->owner,
9526                                 (unsigned long long)dback->offset,
9527                                 dback->found_ref);
9528         } else {
9529                 u64 parent;
9530                 struct tree_backref *tback;
9531
9532                 tback = to_tree_backref(back);
9533                 if (back->full_backref)
9534                         parent = tback->parent;
9535                 else
9536                         parent = 0;
9537
9538                 ret = btrfs_inc_extent_ref(trans, info->extent_root,
9539                                            rec->start, rec->max_size,
9540                                            parent, tback->root, 0, 0);
9541                 fprintf(stderr, "adding new tree backref on "
9542                         "start %llu len %llu parent %llu root %llu\n",
9543                         rec->start, rec->max_size, parent, tback->root);
9544         }
9545 fail:
9546         btrfs_release_path(path);
9547         return ret;
9548 }
9549
9550 static struct extent_entry *find_entry(struct list_head *entries,
9551                                        u64 bytenr, u64 bytes)
9552 {
9553         struct extent_entry *entry = NULL;
9554
9555         list_for_each_entry(entry, entries, list) {
9556                 if (entry->bytenr == bytenr && entry->bytes == bytes)
9557                         return entry;
9558         }
9559
9560         return NULL;
9561 }
9562
9563 static struct extent_entry *find_most_right_entry(struct list_head *entries)
9564 {
9565         struct extent_entry *entry, *best = NULL, *prev = NULL;
9566
9567         list_for_each_entry(entry, entries, list) {
9568                 /*
9569                  * If there are as many broken entries as entries then we know
9570                  * not to trust this particular entry.
9571                  */
9572                 if (entry->broken == entry->count)
9573                         continue;
9574
9575                 /*
9576                  * Special case, when there are only two entries and 'best' is
9577                  * the first one
9578                  */
9579                 if (!prev) {
9580                         best = entry;
9581                         prev = entry;
9582                         continue;
9583                 }
9584
9585                 /*
9586                  * If our current entry == best then we can't be sure our best
9587                  * is really the best, so we need to keep searching.
9588                  */
9589                 if (best && best->count == entry->count) {
9590                         prev = entry;
9591                         best = NULL;
9592                         continue;
9593                 }
9594
9595                 /* Prev == entry, not good enough, have to keep searching */
9596                 if (!prev->broken && prev->count == entry->count)
9597                         continue;
9598
9599                 if (!best)
9600                         best = (prev->count > entry->count) ? prev : entry;
9601                 else if (best->count < entry->count)
9602                         best = entry;
9603                 prev = entry;
9604         }
9605
9606         return best;
9607 }
9608
9609 static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path,
9610                       struct data_backref *dback, struct extent_entry *entry)
9611 {
9612         struct btrfs_trans_handle *trans;
9613         struct btrfs_root *root;
9614         struct btrfs_file_extent_item *fi;
9615         struct extent_buffer *leaf;
9616         struct btrfs_key key;
9617         u64 bytenr, bytes;
9618         int ret, err;
9619
9620         key.objectid = dback->root;
9621         key.type = BTRFS_ROOT_ITEM_KEY;
9622         key.offset = (u64)-1;
9623         root = btrfs_read_fs_root(info, &key);
9624         if (IS_ERR(root)) {
9625                 fprintf(stderr, "Couldn't find root for our ref\n");
9626                 return -EINVAL;
9627         }
9628
9629         /*
9630          * The backref points to the original offset of the extent if it was
9631          * split, so we need to search down to the offset we have and then walk
9632          * forward until we find the backref we're looking for.
9633          */
9634         key.objectid = dback->owner;
9635         key.type = BTRFS_EXTENT_DATA_KEY;
9636         key.offset = dback->offset;
9637         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
9638         if (ret < 0) {
9639                 fprintf(stderr, "Error looking up ref %d\n", ret);
9640                 return ret;
9641         }
9642
9643         while (1) {
9644                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
9645                         ret = btrfs_next_leaf(root, path);
9646                         if (ret) {
9647                                 fprintf(stderr, "Couldn't find our ref, next\n");
9648                                 return -EINVAL;
9649                         }
9650                 }
9651                 leaf = path->nodes[0];
9652                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
9653                 if (key.objectid != dback->owner ||
9654                     key.type != BTRFS_EXTENT_DATA_KEY) {
9655                         fprintf(stderr, "Couldn't find our ref, search\n");
9656                         return -EINVAL;
9657                 }
9658                 fi = btrfs_item_ptr(leaf, path->slots[0],
9659                                     struct btrfs_file_extent_item);
9660                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9661                 bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
9662
9663                 if (bytenr == dback->disk_bytenr && bytes == dback->bytes)
9664                         break;
9665                 path->slots[0]++;
9666         }
9667
9668         btrfs_release_path(path);
9669
9670         trans = btrfs_start_transaction(root, 1);
9671         if (IS_ERR(trans))
9672                 return PTR_ERR(trans);
9673
9674         /*
9675          * Ok we have the key of the file extent we want to fix, now we can cow
9676          * down to the thing and fix it.
9677          */
9678         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
9679         if (ret < 0) {
9680                 fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n",
9681                         key.objectid, key.type, key.offset, ret);
9682                 goto out;
9683         }
9684         if (ret > 0) {
9685                 fprintf(stderr, "Well that's odd, we just found this key "
9686                         "[%Lu, %u, %Lu]\n", key.objectid, key.type,
9687                         key.offset);
9688                 ret = -EINVAL;
9689                 goto out;
9690         }
9691         leaf = path->nodes[0];
9692         fi = btrfs_item_ptr(leaf, path->slots[0],
9693                             struct btrfs_file_extent_item);
9694
9695         if (btrfs_file_extent_compression(leaf, fi) &&
9696             dback->disk_bytenr != entry->bytenr) {
9697                 fprintf(stderr, "Ref doesn't match the record start and is "
9698                         "compressed, please take a btrfs-image of this file "
9699                         "system and send it to a btrfs developer so they can "
9700                         "complete this functionality for bytenr %Lu\n",
9701                         dback->disk_bytenr);
9702                 ret = -EINVAL;
9703                 goto out;
9704         }
9705
9706         if (dback->node.broken && dback->disk_bytenr != entry->bytenr) {
9707                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9708         } else if (dback->disk_bytenr > entry->bytenr) {
9709                 u64 off_diff, offset;
9710
9711                 off_diff = dback->disk_bytenr - entry->bytenr;
9712                 offset = btrfs_file_extent_offset(leaf, fi);
9713                 if (dback->disk_bytenr + offset +
9714                     btrfs_file_extent_num_bytes(leaf, fi) >
9715                     entry->bytenr + entry->bytes) {
9716                         fprintf(stderr, "Ref is past the entry end, please "
9717                                 "take a btrfs-image of this file system and "
9718                                 "send it to a btrfs developer, ref %Lu\n",
9719                                 dback->disk_bytenr);
9720                         ret = -EINVAL;
9721                         goto out;
9722                 }
9723                 offset += off_diff;
9724                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9725                 btrfs_set_file_extent_offset(leaf, fi, offset);
9726         } else if (dback->disk_bytenr < entry->bytenr) {
9727                 u64 offset;
9728
9729                 offset = btrfs_file_extent_offset(leaf, fi);
9730                 if (dback->disk_bytenr + offset < entry->bytenr) {
9731                         fprintf(stderr, "Ref is before the entry start, please"
9732                                 " take a btrfs-image of this file system and "
9733                                 "send it to a btrfs developer, ref %Lu\n",
9734                                 dback->disk_bytenr);
9735                         ret = -EINVAL;
9736                         goto out;
9737                 }
9738
9739                 offset += dback->disk_bytenr;
9740                 offset -= entry->bytenr;
9741                 btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr);
9742                 btrfs_set_file_extent_offset(leaf, fi, offset);
9743         }
9744
9745         btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes);
9746
9747         /*
9748          * Chances are if disk_num_bytes were wrong then so is ram_bytes, but
9749          * only do this if we aren't using compression, otherwise it's a
9750          * trickier case.
9751          */
9752         if (!btrfs_file_extent_compression(leaf, fi))
9753                 btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes);
9754         else
9755                 printf("ram bytes may be wrong?\n");
9756         btrfs_mark_buffer_dirty(leaf);
9757 out:
9758         err = btrfs_commit_transaction(trans, root);
9759         btrfs_release_path(path);
9760         return ret ? ret : err;
9761 }
9762
9763 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
9764                            struct extent_record *rec)
9765 {
9766         struct extent_backref *back, *tmp;
9767         struct data_backref *dback;
9768         struct extent_entry *entry, *best = NULL;
9769         LIST_HEAD(entries);
9770         int nr_entries = 0;
9771         int broken_entries = 0;
9772         int ret = 0;
9773         short mismatch = 0;
9774
9775         /*
9776          * Metadata is easy and the backrefs should always agree on bytenr and
9777          * size, if not we've got bigger issues.
9778          */
9779         if (rec->metadata)
9780                 return 0;
9781
9782         rbtree_postorder_for_each_entry_safe(back, tmp,
9783                                              &rec->backref_tree, node) {
9784                 if (back->full_backref || !back->is_data)
9785                         continue;
9786
9787                 dback = to_data_backref(back);
9788
9789                 /*
9790                  * We only pay attention to backrefs that we found a real
9791                  * backref for.
9792                  */
9793                 if (dback->found_ref == 0)
9794                         continue;
9795
9796                 /*
9797                  * For now we only catch when the bytes don't match, not the
9798                  * bytenr.  We can easily do this at the same time, but I want
9799                  * to have a fs image to test on before we just add repair
9800                  * functionality willy-nilly so we know we won't screw up the
9801                  * repair.
9802                  */
9803
9804                 entry = find_entry(&entries, dback->disk_bytenr,
9805                                    dback->bytes);
9806                 if (!entry) {
9807                         entry = malloc(sizeof(struct extent_entry));
9808                         if (!entry) {
9809                                 ret = -ENOMEM;
9810                                 goto out;
9811                         }
9812                         memset(entry, 0, sizeof(*entry));
9813                         entry->bytenr = dback->disk_bytenr;
9814                         entry->bytes = dback->bytes;
9815                         list_add_tail(&entry->list, &entries);
9816                         nr_entries++;
9817                 }
9818
9819                 /*
9820                  * If we only have on entry we may think the entries agree when
9821                  * in reality they don't so we have to do some extra checking.
9822                  */
9823                 if (dback->disk_bytenr != rec->start ||
9824                     dback->bytes != rec->nr || back->broken)
9825                         mismatch = 1;
9826
9827                 if (back->broken) {
9828                         entry->broken++;
9829                         broken_entries++;
9830                 }
9831
9832                 entry->count++;
9833         }
9834
9835         /* Yay all the backrefs agree, carry on good sir */
9836         if (nr_entries <= 1 && !mismatch)
9837                 goto out;
9838
9839         fprintf(stderr, "attempting to repair backref discrepency for bytenr "
9840                 "%Lu\n", rec->start);
9841
9842         /*
9843          * First we want to see if the backrefs can agree amongst themselves who
9844          * is right, so figure out which one of the entries has the highest
9845          * count.
9846          */
9847         best = find_most_right_entry(&entries);
9848
9849         /*
9850          * Ok so we may have an even split between what the backrefs think, so
9851          * this is where we use the extent ref to see what it thinks.
9852          */
9853         if (!best) {
9854                 entry = find_entry(&entries, rec->start, rec->nr);
9855                 if (!entry && (!broken_entries || !rec->found_rec)) {
9856                         fprintf(stderr, "Backrefs don't agree with each other "
9857                                 "and extent record doesn't agree with anybody,"
9858                                 " so we can't fix bytenr %Lu bytes %Lu\n",
9859                                 rec->start, rec->nr);
9860                         ret = -EINVAL;
9861                         goto out;
9862                 } else if (!entry) {
9863                         /*
9864                          * Ok our backrefs were broken, we'll assume this is the
9865                          * correct value and add an entry for this range.
9866                          */
9867                         entry = malloc(sizeof(struct extent_entry));
9868                         if (!entry) {
9869                                 ret = -ENOMEM;
9870                                 goto out;
9871                         }
9872                         memset(entry, 0, sizeof(*entry));
9873                         entry->bytenr = rec->start;
9874                         entry->bytes = rec->nr;
9875                         list_add_tail(&entry->list, &entries);
9876                         nr_entries++;
9877                 }
9878                 entry->count++;
9879                 best = find_most_right_entry(&entries);
9880                 if (!best) {
9881                         fprintf(stderr, "Backrefs and extent record evenly "
9882                                 "split on who is right, this is going to "
9883                                 "require user input to fix bytenr %Lu bytes "
9884                                 "%Lu\n", rec->start, rec->nr);
9885                         ret = -EINVAL;
9886                         goto out;
9887                 }
9888         }
9889
9890         /*
9891          * I don't think this can happen currently as we'll abort() if we catch
9892          * this case higher up, but in case somebody removes that we still can't
9893          * deal with it properly here yet, so just bail out of that's the case.
9894          */
9895         if (best->bytenr != rec->start) {
9896                 fprintf(stderr, "Extent start and backref starts don't match, "
9897                         "please use btrfs-image on this file system and send "
9898                         "it to a btrfs developer so they can make fsck fix "
9899                         "this particular case.  bytenr is %Lu, bytes is %Lu\n",
9900                         rec->start, rec->nr);
9901                 ret = -EINVAL;
9902                 goto out;
9903         }
9904
9905         /*
9906          * Ok great we all agreed on an extent record, let's go find the real
9907          * references and fix up the ones that don't match.
9908          */
9909         rbtree_postorder_for_each_entry_safe(back, tmp,
9910                                              &rec->backref_tree, node) {
9911                 if (back->full_backref || !back->is_data)
9912                         continue;
9913
9914                 dback = to_data_backref(back);
9915
9916                 /*
9917                  * Still ignoring backrefs that don't have a real ref attached
9918                  * to them.
9919                  */
9920                 if (dback->found_ref == 0)
9921                         continue;
9922
9923                 if (dback->bytes == best->bytes &&
9924                     dback->disk_bytenr == best->bytenr)
9925                         continue;
9926
9927                 ret = repair_ref(info, path, dback, best);
9928                 if (ret)
9929                         goto out;
9930         }
9931
9932         /*
9933          * Ok we messed with the actual refs, which means we need to drop our
9934          * entire cache and go back and rescan.  I know this is a huge pain and
9935          * adds a lot of extra work, but it's the only way to be safe.  Once all
9936          * the backrefs agree we may not need to do anything to the extent
9937          * record itself.
9938          */
9939         ret = -EAGAIN;
9940 out:
9941         while (!list_empty(&entries)) {
9942                 entry = list_entry(entries.next, struct extent_entry, list);
9943                 list_del_init(&entry->list);
9944                 free(entry);
9945         }
9946         return ret;
9947 }
9948
9949 static int process_duplicates(struct cache_tree *extent_cache,
9950                               struct extent_record *rec)
9951 {
9952         struct extent_record *good, *tmp;
9953         struct cache_extent *cache;
9954         int ret;
9955
9956         /*
9957          * If we found a extent record for this extent then return, or if we
9958          * have more than one duplicate we are likely going to need to delete
9959          * something.
9960          */
9961         if (rec->found_rec || rec->num_duplicates > 1)
9962                 return 0;
9963
9964         /* Shouldn't happen but just in case */
9965         BUG_ON(!rec->num_duplicates);
9966
9967         /*
9968          * So this happens if we end up with a backref that doesn't match the
9969          * actual extent entry.  So either the backref is bad or the extent
9970          * entry is bad.  Either way we want to have the extent_record actually
9971          * reflect what we found in the extent_tree, so we need to take the
9972          * duplicate out and use that as the extent_record since the only way we
9973          * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY.
9974          */
9975         remove_cache_extent(extent_cache, &rec->cache);
9976
9977         good = to_extent_record(rec->dups.next);
9978         list_del_init(&good->list);
9979         INIT_LIST_HEAD(&good->backrefs);
9980         INIT_LIST_HEAD(&good->dups);
9981         good->cache.start = good->start;
9982         good->cache.size = good->nr;
9983         good->content_checked = 0;
9984         good->owner_ref_checked = 0;
9985         good->num_duplicates = 0;
9986         good->refs = rec->refs;
9987         list_splice_init(&rec->backrefs, &good->backrefs);
9988         while (1) {
9989                 cache = lookup_cache_extent(extent_cache, good->start,
9990                                             good->nr);
9991                 if (!cache)
9992                         break;
9993                 tmp = container_of(cache, struct extent_record, cache);
9994
9995                 /*
9996                  * If we find another overlapping extent and it's found_rec is
9997                  * set then it's a duplicate and we need to try and delete
9998                  * something.
9999                  */
10000                 if (tmp->found_rec || tmp->num_duplicates > 0) {
10001                         if (list_empty(&good->list))
10002                                 list_add_tail(&good->list,
10003                                               &duplicate_extents);
10004                         good->num_duplicates += tmp->num_duplicates + 1;
10005                         list_splice_init(&tmp->dups, &good->dups);
10006                         list_del_init(&tmp->list);
10007                         list_add_tail(&tmp->list, &good->dups);
10008                         remove_cache_extent(extent_cache, &tmp->cache);
10009                         continue;
10010                 }
10011
10012                 /*
10013                  * Ok we have another non extent item backed extent rec, so lets
10014                  * just add it to this extent and carry on like we did above.
10015                  */
10016                 good->refs += tmp->refs;
10017                 list_splice_init(&tmp->backrefs, &good->backrefs);
10018                 remove_cache_extent(extent_cache, &tmp->cache);
10019                 free(tmp);
10020         }
10021         ret = insert_cache_extent(extent_cache, &good->cache);
10022         BUG_ON(ret);
10023         free(rec);
10024         return good->num_duplicates ? 0 : 1;
10025 }
10026
10027 static int delete_duplicate_records(struct btrfs_root *root,
10028                                     struct extent_record *rec)
10029 {
10030         struct btrfs_trans_handle *trans;
10031         LIST_HEAD(delete_list);
10032         struct btrfs_path path;
10033         struct extent_record *tmp, *good, *n;
10034         int nr_del = 0;
10035         int ret = 0, err;
10036         struct btrfs_key key;
10037
10038         btrfs_init_path(&path);
10039
10040         good = rec;
10041         /* Find the record that covers all of the duplicates. */
10042         list_for_each_entry(tmp, &rec->dups, list) {
10043                 if (good->start < tmp->start)
10044                         continue;
10045                 if (good->nr > tmp->nr)
10046                         continue;
10047
10048                 if (tmp->start + tmp->nr < good->start + good->nr) {
10049                         fprintf(stderr, "Ok we have overlapping extents that "
10050                                 "aren't completely covered by each other, this "
10051                                 "is going to require more careful thought.  "
10052                                 "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n",
10053                                 tmp->start, tmp->nr, good->start, good->nr);
10054                         abort();
10055                 }
10056                 good = tmp;
10057         }
10058
10059         if (good != rec)
10060                 list_add_tail(&rec->list, &delete_list);
10061
10062         list_for_each_entry_safe(tmp, n, &rec->dups, list) {
10063                 if (tmp == good)
10064                         continue;
10065                 list_move_tail(&tmp->list, &delete_list);
10066         }
10067
10068         root = root->fs_info->extent_root;
10069         trans = btrfs_start_transaction(root, 1);
10070         if (IS_ERR(trans)) {
10071                 ret = PTR_ERR(trans);
10072                 goto out;
10073         }
10074
10075         list_for_each_entry(tmp, &delete_list, list) {
10076                 if (tmp->found_rec == 0)
10077                         continue;
10078                 key.objectid = tmp->start;
10079                 key.type = BTRFS_EXTENT_ITEM_KEY;
10080                 key.offset = tmp->nr;
10081
10082                 /* Shouldn't happen but just in case */
10083                 if (tmp->metadata) {
10084                         fprintf(stderr, "Well this shouldn't happen, extent "
10085                                 "record overlaps but is metadata? "
10086                                 "[%Lu, %Lu]\n", tmp->start, tmp->nr);
10087                         abort();
10088                 }
10089
10090                 ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
10091                 if (ret) {
10092                         if (ret > 0)
10093                                 ret = -EINVAL;
10094                         break;
10095                 }
10096                 ret = btrfs_del_item(trans, root, &path);
10097                 if (ret)
10098                         break;
10099                 btrfs_release_path(&path);
10100                 nr_del++;
10101         }
10102         err = btrfs_commit_transaction(trans, root);
10103         if (err && !ret)
10104                 ret = err;
10105 out:
10106         while (!list_empty(&delete_list)) {
10107                 tmp = to_extent_record(delete_list.next);
10108                 list_del_init(&tmp->list);
10109                 if (tmp == rec)
10110                         continue;
10111                 free(tmp);
10112         }
10113
10114         while (!list_empty(&rec->dups)) {
10115                 tmp = to_extent_record(rec->dups.next);
10116                 list_del_init(&tmp->list);
10117                 free(tmp);
10118         }
10119
10120         btrfs_release_path(&path);
10121
10122         if (!ret && !nr_del)
10123                 rec->num_duplicates = 0;
10124
10125         return ret ? ret : nr_del;
10126 }
10127
10128 static int find_possible_backrefs(struct btrfs_fs_info *info,
10129                                   struct btrfs_path *path,
10130                                   struct cache_tree *extent_cache,
10131                                   struct extent_record *rec)
10132 {
10133         struct btrfs_root *root;
10134         struct extent_backref *back, *tmp;
10135         struct data_backref *dback;
10136         struct cache_extent *cache;
10137         struct btrfs_file_extent_item *fi;
10138         struct btrfs_key key;
10139         u64 bytenr, bytes;
10140         int ret;
10141
10142         rbtree_postorder_for_each_entry_safe(back, tmp,
10143                                              &rec->backref_tree, node) {
10144                 /* Don't care about full backrefs (poor unloved backrefs) */
10145                 if (back->full_backref || !back->is_data)
10146                         continue;
10147
10148                 dback = to_data_backref(back);
10149
10150                 /* We found this one, we don't need to do a lookup */
10151                 if (dback->found_ref)
10152                         continue;
10153
10154                 key.objectid = dback->root;
10155                 key.type = BTRFS_ROOT_ITEM_KEY;
10156                 key.offset = (u64)-1;
10157
10158                 root = btrfs_read_fs_root(info, &key);
10159
10160                 /* No root, definitely a bad ref, skip */
10161                 if (IS_ERR(root) && PTR_ERR(root) == -ENOENT)
10162                         continue;
10163                 /* Other err, exit */
10164                 if (IS_ERR(root))
10165                         return PTR_ERR(root);
10166
10167                 key.objectid = dback->owner;
10168                 key.type = BTRFS_EXTENT_DATA_KEY;
10169                 key.offset = dback->offset;
10170                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
10171                 if (ret) {
10172                         btrfs_release_path(path);
10173                         if (ret < 0)
10174                                 return ret;
10175                         /* Didn't find it, we can carry on */
10176                         ret = 0;
10177                         continue;
10178                 }
10179
10180                 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
10181                                     struct btrfs_file_extent_item);
10182                 bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi);
10183                 bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi);
10184                 btrfs_release_path(path);
10185                 cache = lookup_cache_extent(extent_cache, bytenr, 1);
10186                 if (cache) {
10187                         struct extent_record *tmp;
10188                         tmp = container_of(cache, struct extent_record, cache);
10189
10190                         /*
10191                          * If we found an extent record for the bytenr for this
10192                          * particular backref then we can't add it to our
10193                          * current extent record.  We only want to add backrefs
10194                          * that don't have a corresponding extent item in the
10195                          * extent tree since they likely belong to this record
10196                          * and we need to fix it if it doesn't match bytenrs.
10197                          */
10198                         if  (tmp->found_rec)
10199                                 continue;
10200                 }
10201
10202                 dback->found_ref += 1;
10203                 dback->disk_bytenr = bytenr;
10204                 dback->bytes = bytes;
10205
10206                 /*
10207                  * Set this so the verify backref code knows not to trust the
10208                  * values in this backref.
10209                  */
10210                 back->broken = 1;
10211         }
10212
10213         return 0;
10214 }
10215
10216 /*
10217  * Record orphan data ref into corresponding root.
10218  *
10219  * Return 0 if the extent item contains data ref and recorded.
10220  * Return 1 if the extent item contains no useful data ref
10221  *   On that case, it may contains only shared_dataref or metadata backref
10222  *   or the file extent exists(this should be handled by the extent bytenr
10223  *   recovery routine)
10224  * Return <0 if something goes wrong.
10225  */
10226 static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
10227                                       struct extent_record *rec)
10228 {
10229         struct btrfs_key key;
10230         struct btrfs_root *dest_root;
10231         struct extent_backref *back, *tmp;
10232         struct data_backref *dback;
10233         struct orphan_data_extent *orphan;
10234         struct btrfs_path path;
10235         int recorded_data_ref = 0;
10236         int ret = 0;
10237
10238         if (rec->metadata)
10239                 return 1;
10240         btrfs_init_path(&path);
10241         rbtree_postorder_for_each_entry_safe(back, tmp,
10242                                              &rec->backref_tree, node) {
10243                 if (back->full_backref || !back->is_data ||
10244                     !back->found_extent_tree)
10245                         continue;
10246                 dback = to_data_backref(back);
10247                 if (dback->found_ref)
10248                         continue;
10249                 key.objectid = dback->root;
10250                 key.type = BTRFS_ROOT_ITEM_KEY;
10251                 key.offset = (u64)-1;
10252
10253                 dest_root = btrfs_read_fs_root(fs_info, &key);
10254
10255                 /* For non-exist root we just skip it */
10256                 if (IS_ERR(dest_root) || !dest_root)
10257                         continue;
10258
10259                 key.objectid = dback->owner;
10260                 key.type = BTRFS_EXTENT_DATA_KEY;
10261                 key.offset = dback->offset;
10262
10263                 ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0);
10264                 btrfs_release_path(&path);
10265                 /*
10266                  * For ret < 0, it's OK since the fs-tree may be corrupted,
10267                  * we need to record it for inode/file extent rebuild.
10268                  * For ret > 0, we record it only for file extent rebuild.
10269                  * For ret == 0, the file extent exists but only bytenr
10270                  * mismatch, let the original bytenr fix routine to handle,
10271                  * don't record it.
10272                  */
10273                 if (ret == 0)
10274                         continue;
10275                 ret = 0;
10276                 orphan = malloc(sizeof(*orphan));
10277                 if (!orphan) {
10278                         ret = -ENOMEM;
10279                         goto out;
10280                 }
10281                 INIT_LIST_HEAD(&orphan->list);
10282                 orphan->root = dback->root;
10283                 orphan->objectid = dback->owner;
10284                 orphan->offset = dback->offset;
10285                 orphan->disk_bytenr = rec->cache.start;
10286                 orphan->disk_len = rec->cache.size;
10287                 list_add(&dest_root->orphan_data_extents, &orphan->list);
10288                 recorded_data_ref = 1;
10289         }
10290 out:
10291         btrfs_release_path(&path);
10292         if (!ret)
10293                 return !recorded_data_ref;
10294         else
10295                 return ret;
10296 }
10297
10298 /*
10299  * when an incorrect extent item is found, this will delete
10300  * all of the existing entries for it and recreate them
10301  * based on what the tree scan found.
10302  */
10303 static int fixup_extent_refs(struct btrfs_fs_info *info,
10304                              struct cache_tree *extent_cache,
10305                              struct extent_record *rec)
10306 {
10307         struct btrfs_trans_handle *trans = NULL;
10308         int ret;
10309         struct btrfs_path path;
10310         struct cache_extent *cache;
10311         struct extent_backref *back, *tmp;
10312         int allocated = 0;
10313         u64 flags = 0;
10314
10315         if (rec->flag_block_full_backref)
10316                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10317
10318         btrfs_init_path(&path);
10319         if (rec->refs != rec->extent_item_refs && !rec->metadata) {
10320                 /*
10321                  * Sometimes the backrefs themselves are so broken they don't
10322                  * get attached to any meaningful rec, so first go back and
10323                  * check any of our backrefs that we couldn't find and throw
10324                  * them into the list if we find the backref so that
10325                  * verify_backrefs can figure out what to do.
10326                  */
10327                 ret = find_possible_backrefs(info, &path, extent_cache, rec);
10328                 if (ret < 0)
10329                         goto out;
10330         }
10331
10332         /* step one, make sure all of the backrefs agree */
10333         ret = verify_backrefs(info, &path, rec);
10334         if (ret < 0)
10335                 goto out;
10336
10337         trans = btrfs_start_transaction(info->extent_root, 1);
10338         if (IS_ERR(trans)) {
10339                 ret = PTR_ERR(trans);
10340                 goto out;
10341         }
10342
10343         /* step two, delete all the existing records */
10344         ret = delete_extent_records(trans, info->extent_root, &path,
10345                                     rec->start);
10346
10347         if (ret < 0)
10348                 goto out;
10349
10350         /* was this block corrupt?  If so, don't add references to it */
10351         cache = lookup_cache_extent(info->corrupt_blocks,
10352                                     rec->start, rec->max_size);
10353         if (cache) {
10354                 ret = 0;
10355                 goto out;
10356         }
10357
10358         /* step three, recreate all the refs we did find */
10359         rbtree_postorder_for_each_entry_safe(back, tmp,
10360                                              &rec->backref_tree, node) {
10361                 /*
10362                  * if we didn't find any references, don't create a
10363                  * new extent record
10364                  */
10365                 if (!back->found_ref)
10366                         continue;
10367
10368                 rec->bad_full_backref = 0;
10369                 ret = record_extent(trans, info, &path, rec, back, allocated, flags);
10370                 allocated = 1;
10371
10372                 if (ret)
10373                         goto out;
10374         }
10375 out:
10376         if (trans) {
10377                 int err = btrfs_commit_transaction(trans, info->extent_root);
10378                 if (!ret)
10379                         ret = err;
10380         }
10381
10382         if (!ret)
10383                 fprintf(stderr, "Repaired extent references for %llu\n",
10384                                 (unsigned long long)rec->start);
10385
10386         btrfs_release_path(&path);
10387         return ret;
10388 }
10389
10390 static int fixup_extent_flags(struct btrfs_fs_info *fs_info,
10391                               struct extent_record *rec)
10392 {
10393         struct btrfs_trans_handle *trans;
10394         struct btrfs_root *root = fs_info->extent_root;
10395         struct btrfs_path path;
10396         struct btrfs_extent_item *ei;
10397         struct btrfs_key key;
10398         u64 flags;
10399         int ret = 0;
10400
10401         key.objectid = rec->start;
10402         if (rec->metadata) {
10403                 key.type = BTRFS_METADATA_ITEM_KEY;
10404                 key.offset = rec->info_level;
10405         } else {
10406                 key.type = BTRFS_EXTENT_ITEM_KEY;
10407                 key.offset = rec->max_size;
10408         }
10409
10410         trans = btrfs_start_transaction(root, 0);
10411         if (IS_ERR(trans))
10412                 return PTR_ERR(trans);
10413
10414         btrfs_init_path(&path);
10415         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
10416         if (ret < 0) {
10417                 btrfs_release_path(&path);
10418                 btrfs_commit_transaction(trans, root);
10419                 return ret;
10420         } else if (ret) {
10421                 fprintf(stderr, "Didn't find extent for %llu\n",
10422                         (unsigned long long)rec->start);
10423                 btrfs_release_path(&path);
10424                 btrfs_commit_transaction(trans, root);
10425                 return -ENOENT;
10426         }
10427
10428         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
10429                             struct btrfs_extent_item);
10430         flags = btrfs_extent_flags(path.nodes[0], ei);
10431         if (rec->flag_block_full_backref) {
10432                 fprintf(stderr, "setting full backref on %llu\n",
10433                         (unsigned long long)key.objectid);
10434                 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
10435         } else {
10436                 fprintf(stderr, "clearing full backref on %llu\n",
10437                         (unsigned long long)key.objectid);
10438                 flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF;
10439         }
10440         btrfs_set_extent_flags(path.nodes[0], ei, flags);
10441         btrfs_mark_buffer_dirty(path.nodes[0]);
10442         btrfs_release_path(&path);
10443         ret = btrfs_commit_transaction(trans, root);
10444         if (!ret)
10445                 fprintf(stderr, "Repaired extent flags for %llu\n",
10446                                 (unsigned long long)rec->start);
10447
10448         return ret;
10449 }
10450
10451 /* right now we only prune from the extent allocation tree */
10452 static int prune_one_block(struct btrfs_trans_handle *trans,
10453                            struct btrfs_fs_info *info,
10454                            struct btrfs_corrupt_block *corrupt)
10455 {
10456         int ret;
10457         struct btrfs_path path;
10458         struct extent_buffer *eb;
10459         u64 found;
10460         int slot;
10461         int nritems;
10462         int level = corrupt->level + 1;
10463
10464         btrfs_init_path(&path);
10465 again:
10466         /* we want to stop at the parent to our busted block */
10467         path.lowest_level = level;
10468
10469         ret = btrfs_search_slot(trans, info->extent_root,
10470                                 &corrupt->key, &path, -1, 1);
10471
10472         if (ret < 0)
10473                 goto out;
10474
10475         eb = path.nodes[level];
10476         if (!eb) {
10477                 ret = -ENOENT;
10478                 goto out;
10479         }
10480
10481         /*
10482          * hopefully the search gave us the block we want to prune,
10483          * lets try that first
10484          */
10485         slot = path.slots[level];
10486         found =  btrfs_node_blockptr(eb, slot);
10487         if (found == corrupt->cache.start)
10488                 goto del_ptr;
10489
10490         nritems = btrfs_header_nritems(eb);
10491
10492         /* the search failed, lets scan this node and hope we find it */
10493         for (slot = 0; slot < nritems; slot++) {
10494                 found =  btrfs_node_blockptr(eb, slot);
10495                 if (found == corrupt->cache.start)
10496                         goto del_ptr;
10497         }
10498         /*
10499          * we couldn't find the bad block.  TODO, search all the nodes for pointers
10500          * to this block
10501          */
10502         if (eb == info->extent_root->node) {
10503                 ret = -ENOENT;
10504                 goto out;
10505         } else {
10506                 level++;
10507                 btrfs_release_path(&path);
10508                 goto again;
10509         }
10510
10511 del_ptr:
10512         printk("deleting pointer to block %Lu\n", corrupt->cache.start);
10513         ret = btrfs_del_ptr(info->extent_root, &path, level, slot);
10514
10515 out:
10516         btrfs_release_path(&path);
10517         return ret;
10518 }
10519
10520 static int prune_corrupt_blocks(struct btrfs_fs_info *info)
10521 {
10522         struct btrfs_trans_handle *trans = NULL;
10523         struct cache_extent *cache;
10524         struct btrfs_corrupt_block *corrupt;
10525
10526         while (1) {
10527                 cache = search_cache_extent(info->corrupt_blocks, 0);
10528                 if (!cache)
10529                         break;
10530                 if (!trans) {
10531                         trans = btrfs_start_transaction(info->extent_root, 1);
10532                         if (IS_ERR(trans))
10533                                 return PTR_ERR(trans);
10534                 }
10535                 corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
10536                 prune_one_block(trans, info, corrupt);
10537                 remove_cache_extent(info->corrupt_blocks, cache);
10538         }
10539         if (trans)
10540                 return btrfs_commit_transaction(trans, info->extent_root);
10541         return 0;
10542 }
10543
10544 static void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
10545 {
10546         struct btrfs_block_group_cache *cache;
10547         u64 start, end;
10548         int ret;
10549
10550         while (1) {
10551                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
10552                                             &start, &end, EXTENT_DIRTY);
10553                 if (ret)
10554                         break;
10555                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
10556         }
10557
10558         start = 0;
10559         while (1) {
10560                 cache = btrfs_lookup_first_block_group(fs_info, start);
10561                 if (!cache)
10562                         break;
10563                 if (cache->cached)
10564                         cache->cached = 0;
10565                 start = cache->key.objectid + cache->key.offset;
10566         }
10567 }
10568
10569 static int check_extent_refs(struct btrfs_root *root,
10570                              struct cache_tree *extent_cache)
10571 {
10572         struct extent_record *rec;
10573         struct cache_extent *cache;
10574         int ret = 0;
10575         int had_dups = 0;
10576         int err = 0;
10577
10578         if (repair) {
10579                 /*
10580                  * if we're doing a repair, we have to make sure
10581                  * we don't allocate from the problem extents.
10582                  * In the worst case, this will be all the
10583                  * extents in the FS
10584                  */
10585                 cache = search_cache_extent(extent_cache, 0);
10586                 while(cache) {
10587                         rec = container_of(cache, struct extent_record, cache);
10588                         set_extent_dirty(root->fs_info->excluded_extents,
10589                                          rec->start,
10590                                          rec->start + rec->max_size - 1);
10591                         cache = next_cache_extent(cache);
10592                 }
10593
10594                 /* pin down all the corrupted blocks too */
10595                 cache = search_cache_extent(root->fs_info->corrupt_blocks, 0);
10596                 while(cache) {
10597                         set_extent_dirty(root->fs_info->excluded_extents,
10598                                          cache->start,
10599                                          cache->start + cache->size - 1);
10600                         cache = next_cache_extent(cache);
10601                 }
10602                 prune_corrupt_blocks(root->fs_info);
10603                 reset_cached_block_groups(root->fs_info);
10604         }
10605
10606         reset_cached_block_groups(root->fs_info);
10607
10608         /*
10609          * We need to delete any duplicate entries we find first otherwise we
10610          * could mess up the extent tree when we have backrefs that actually
10611          * belong to a different extent item and not the weird duplicate one.
10612          */
10613         while (repair && !list_empty(&duplicate_extents)) {
10614                 rec = to_extent_record(duplicate_extents.next);
10615                 list_del_init(&rec->list);
10616
10617                 /* Sometimes we can find a backref before we find an actual
10618                  * extent, so we need to process it a little bit to see if there
10619                  * truly are multiple EXTENT_ITEM_KEY's for the same range, or
10620                  * if this is a backref screwup.  If we need to delete stuff
10621                  * process_duplicates() will return 0, otherwise it will return
10622                  * 1 and we
10623                  */
10624                 if (process_duplicates(extent_cache, rec))
10625                         continue;
10626                 ret = delete_duplicate_records(root, rec);
10627                 if (ret < 0)
10628                         return ret;
10629                 /*
10630                  * delete_duplicate_records will return the number of entries
10631                  * deleted, so if it's greater than 0 then we know we actually
10632                  * did something and we need to remove.
10633                  */
10634                 if (ret)
10635                         had_dups = 1;
10636         }
10637
10638         if (had_dups)
10639                 return -EAGAIN;
10640
10641         while(1) {
10642                 int cur_err = 0;
10643                 int fix = 0;
10644
10645                 cache = search_cache_extent(extent_cache, 0);
10646                 if (!cache)
10647                         break;
10648                 rec = container_of(cache, struct extent_record, cache);
10649                 if (rec->num_duplicates) {
10650                         fprintf(stderr, "extent item %llu has multiple extent "
10651                                 "items\n", (unsigned long long)rec->start);
10652                         cur_err = 1;
10653                 }
10654
10655                 if (rec->refs != rec->extent_item_refs) {
10656                         fprintf(stderr, "ref mismatch on [%llu %llu] ",
10657                                 (unsigned long long)rec->start,
10658                                 (unsigned long long)rec->nr);
10659                         fprintf(stderr, "extent item %llu, found %llu\n",
10660                                 (unsigned long long)rec->extent_item_refs,
10661                                 (unsigned long long)rec->refs);
10662                         ret = record_orphan_data_extents(root->fs_info, rec);
10663                         if (ret < 0)
10664                                 goto repair_abort;
10665                         fix = ret;
10666                         cur_err = 1;
10667                 }
10668                 if (all_backpointers_checked(rec, 1)) {
10669                         fprintf(stderr, "backpointer mismatch on [%llu %llu]\n",
10670                                 (unsigned long long)rec->start,
10671                                 (unsigned long long)rec->nr);
10672                         fix = 1;
10673                         cur_err = 1;
10674                 }
10675                 if (!rec->owner_ref_checked) {
10676                         fprintf(stderr, "owner ref check failed [%llu %llu]\n",
10677                                 (unsigned long long)rec->start,
10678                                 (unsigned long long)rec->nr);
10679                         fix = 1;
10680                         cur_err = 1;
10681                 }
10682
10683                 if (repair && fix) {
10684                         ret = fixup_extent_refs(root->fs_info, extent_cache, rec);
10685                         if (ret)
10686                                 goto repair_abort;
10687                 }
10688
10689
10690                 if (rec->bad_full_backref) {
10691                         fprintf(stderr, "bad full backref, on [%llu]\n",
10692                                 (unsigned long long)rec->start);
10693                         if (repair) {
10694                                 ret = fixup_extent_flags(root->fs_info, rec);
10695                                 if (ret)
10696                                         goto repair_abort;
10697                                 fix = 1;
10698                         }
10699                         cur_err = 1;
10700                 }
10701                 /*
10702                  * Although it's not a extent ref's problem, we reuse this
10703                  * routine for error reporting.
10704                  * No repair function yet.
10705                  */
10706                 if (rec->crossing_stripes) {
10707                         fprintf(stderr,
10708                                 "bad metadata [%llu, %llu) crossing stripe boundary\n",
10709                                 rec->start, rec->start + rec->max_size);
10710                         cur_err = 1;
10711                 }
10712
10713                 if (rec->wrong_chunk_type) {
10714                         fprintf(stderr,
10715                                 "bad extent [%llu, %llu), type mismatch with chunk\n",
10716                                 rec->start, rec->start + rec->max_size);
10717                         cur_err = 1;
10718                 }
10719
10720                 err = cur_err;
10721                 remove_cache_extent(extent_cache, cache);
10722                 free_all_extent_backrefs(rec);
10723                 if (!init_extent_tree && repair && (!cur_err || fix))
10724                         clear_extent_dirty(root->fs_info->excluded_extents,
10725                                            rec->start,
10726                                            rec->start + rec->max_size - 1);
10727                 free(rec);
10728         }
10729 repair_abort:
10730         if (repair) {
10731                 if (ret && ret != -EAGAIN) {
10732                         fprintf(stderr, "failed to repair damaged filesystem, aborting\n");
10733                         exit(1);
10734                 } else if (!ret) {
10735                         struct btrfs_trans_handle *trans;
10736
10737                         root = root->fs_info->extent_root;
10738                         trans = btrfs_start_transaction(root, 1);
10739                         if (IS_ERR(trans)) {
10740                                 ret = PTR_ERR(trans);
10741                                 goto repair_abort;
10742                         }
10743
10744                         ret = btrfs_fix_block_accounting(trans, root);
10745                         if (ret)
10746                                 goto repair_abort;
10747                         ret = btrfs_commit_transaction(trans, root);
10748                         if (ret)
10749                                 goto repair_abort;
10750                 }
10751                 return ret;
10752         }
10753
10754         if (err)
10755                 err = -EIO;
10756         return err;
10757 }
10758
10759 u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
10760 {
10761         u64 stripe_size;
10762
10763         if (type & BTRFS_BLOCK_GROUP_RAID0) {
10764                 stripe_size = length;
10765                 stripe_size /= num_stripes;
10766         } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
10767                 stripe_size = length * 2;
10768                 stripe_size /= num_stripes;
10769         } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
10770                 stripe_size = length;
10771                 stripe_size /= (num_stripes - 1);
10772         } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
10773                 stripe_size = length;
10774                 stripe_size /= (num_stripes - 2);
10775         } else {
10776                 stripe_size = length;
10777         }
10778         return stripe_size;
10779 }
10780
10781 /*
10782  * Check the chunk with its block group/dev list ref:
10783  * Return 0 if all refs seems valid.
10784  * Return 1 if part of refs seems valid, need later check for rebuild ref
10785  * like missing block group and needs to search extent tree to rebuild them.
10786  * Return -1 if essential refs are missing and unable to rebuild.
10787  */
10788 static int check_chunk_refs(struct chunk_record *chunk_rec,
10789                             struct block_group_tree *block_group_cache,
10790                             struct device_extent_tree *dev_extent_cache,
10791                             int silent)
10792 {
10793         struct cache_extent *block_group_item;
10794         struct block_group_record *block_group_rec;
10795         struct cache_extent *dev_extent_item;
10796         struct device_extent_record *dev_extent_rec;
10797         u64 devid;
10798         u64 offset;
10799         u64 length;
10800         int metadump_v2 = 0;
10801         int i;
10802         int ret = 0;
10803
10804         block_group_item = lookup_cache_extent(&block_group_cache->tree,
10805                                                chunk_rec->offset,
10806                                                chunk_rec->length);
10807         if (block_group_item) {
10808                 block_group_rec = container_of(block_group_item,
10809                                                struct block_group_record,
10810                                                cache);
10811                 if (chunk_rec->length != block_group_rec->offset ||
10812                     chunk_rec->offset != block_group_rec->objectid ||
10813                     (!metadump_v2 &&
10814                      chunk_rec->type_flags != block_group_rec->flags)) {
10815                         if (!silent)
10816                                 fprintf(stderr,
10817                                         "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n",
10818                                         chunk_rec->objectid,
10819                                         chunk_rec->type,
10820                                         chunk_rec->offset,
10821                                         chunk_rec->length,
10822                                         chunk_rec->offset,
10823                                         chunk_rec->type_flags,
10824                                         block_group_rec->objectid,
10825                                         block_group_rec->type,
10826                                         block_group_rec->offset,
10827                                         block_group_rec->offset,
10828                                         block_group_rec->objectid,
10829                                         block_group_rec->flags);
10830                         ret = -1;
10831                 } else {
10832                         list_del_init(&block_group_rec->list);
10833                         chunk_rec->bg_rec = block_group_rec;
10834                 }
10835         } else {
10836                 if (!silent)
10837                         fprintf(stderr,
10838                                 "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n",
10839                                 chunk_rec->objectid,
10840                                 chunk_rec->type,
10841                                 chunk_rec->offset,
10842                                 chunk_rec->length,
10843                                 chunk_rec->offset,
10844                                 chunk_rec->type_flags);
10845                 ret = 1;
10846         }
10847
10848         if (metadump_v2)
10849                 return ret;
10850
10851         length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length,
10852                                     chunk_rec->num_stripes);
10853         for (i = 0; i < chunk_rec->num_stripes; ++i) {
10854                 devid = chunk_rec->stripes[i].devid;
10855                 offset = chunk_rec->stripes[i].offset;
10856                 dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree,
10857                                                        devid, offset, length);
10858                 if (dev_extent_item) {
10859                         dev_extent_rec = container_of(dev_extent_item,
10860                                                 struct device_extent_record,
10861                                                 cache);
10862                         if (dev_extent_rec->objectid != devid ||
10863                             dev_extent_rec->offset != offset ||
10864                             dev_extent_rec->chunk_offset != chunk_rec->offset ||
10865                             dev_extent_rec->length != length) {
10866                                 if (!silent)
10867                                         fprintf(stderr,
10868                                                 "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n",
10869                                                 chunk_rec->objectid,
10870                                                 chunk_rec->type,
10871                                                 chunk_rec->offset,
10872                                                 chunk_rec->stripes[i].devid,
10873                                                 chunk_rec->stripes[i].offset,
10874                                                 dev_extent_rec->objectid,
10875                                                 dev_extent_rec->offset,
10876                                                 dev_extent_rec->length);
10877                                 ret = -1;
10878                         } else {
10879                                 list_move(&dev_extent_rec->chunk_list,
10880                                           &chunk_rec->dextents);
10881                         }
10882                 } else {
10883                         if (!silent)
10884                                 fprintf(stderr,
10885                                         "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n",
10886                                         chunk_rec->objectid,
10887                                         chunk_rec->type,
10888                                         chunk_rec->offset,
10889                                         chunk_rec->stripes[i].devid,
10890                                         chunk_rec->stripes[i].offset);
10891                         ret = -1;
10892                 }
10893         }
10894         return ret;
10895 }
10896
10897 /* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */
10898 int check_chunks(struct cache_tree *chunk_cache,
10899                  struct block_group_tree *block_group_cache,
10900                  struct device_extent_tree *dev_extent_cache,
10901                  struct list_head *good, struct list_head *bad,
10902                  struct list_head *rebuild, int silent)
10903 {
10904         struct cache_extent *chunk_item;
10905         struct chunk_record *chunk_rec;
10906         struct block_group_record *bg_rec;
10907         struct device_extent_record *dext_rec;
10908         int err;
10909         int ret = 0;
10910
10911         chunk_item = first_cache_extent(chunk_cache);
10912         while (chunk_item) {
10913                 chunk_rec = container_of(chunk_item, struct chunk_record,
10914                                          cache);
10915                 err = check_chunk_refs(chunk_rec, block_group_cache,
10916                                        dev_extent_cache, silent);
10917                 if (err < 0)
10918                         ret = err;
10919                 if (err == 0 && good)
10920                         list_add_tail(&chunk_rec->list, good);
10921                 if (err > 0 && rebuild)
10922                         list_add_tail(&chunk_rec->list, rebuild);
10923                 if (err < 0 && bad)
10924                         list_add_tail(&chunk_rec->list, bad);
10925                 chunk_item = next_cache_extent(chunk_item);
10926         }
10927
10928         list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) {
10929                 if (!silent)
10930                         fprintf(stderr,
10931                                 "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n",
10932                                 bg_rec->objectid,
10933                                 bg_rec->offset,
10934                                 bg_rec->flags);
10935                 if (!ret)
10936                         ret = 1;
10937         }
10938
10939         list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans,
10940                             chunk_list) {
10941                 if (!silent)
10942                         fprintf(stderr,
10943                                 "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n",
10944                                 dext_rec->objectid,
10945                                 dext_rec->offset,
10946                                 dext_rec->length);
10947                 if (!ret)
10948                         ret = 1;
10949         }
10950         return ret;
10951 }
10952
10953
10954 static int check_device_used(struct device_record *dev_rec,
10955                              struct device_extent_tree *dext_cache)
10956 {
10957         struct cache_extent *cache;
10958         struct device_extent_record *dev_extent_rec;
10959         u64 total_byte = 0;
10960
10961         cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0);
10962         while (cache) {
10963                 dev_extent_rec = container_of(cache,
10964                                               struct device_extent_record,
10965                                               cache);
10966                 if (dev_extent_rec->objectid != dev_rec->devid)
10967                         break;
10968
10969                 list_del_init(&dev_extent_rec->device_list);
10970                 total_byte += dev_extent_rec->length;
10971                 cache = next_cache_extent(cache);
10972         }
10973
10974         if (total_byte != dev_rec->byte_used) {
10975                 fprintf(stderr,
10976                         "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n",
10977                         total_byte, dev_rec->byte_used, dev_rec->objectid,
10978                         dev_rec->type, dev_rec->offset);
10979                 return -1;
10980         } else {
10981                 return 0;
10982         }
10983 }
10984
10985 /*
10986  * Extra (optional) check for dev_item size to report possbile problem on a new
10987  * kernel.
10988  */
10989 static void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
10990 {
10991         if (!IS_ALIGNED(total_bytes, sectorsize)) {
10992                 warning(
10993 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
10994                         devid, total_bytes, sectorsize);
10995                 warning(
10996 "this is OK for older kernel, but may cause kernel warning for newer kernels");
10997                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
10998         }
10999 }
11000
11001 /*
11002  * Unlike device size alignment check above, some super total_bytes check
11003  * failure can lead to mount failure for newer kernel.
11004  *
11005  * So this function will return the error for a fatal super total_bytes problem.
11006  */
11007 static bool is_super_size_valid(struct btrfs_fs_info *fs_info)
11008 {
11009         struct btrfs_device *dev;
11010         struct list_head *dev_list = &fs_info->fs_devices->devices;
11011         u64 total_bytes = 0;
11012         u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy);
11013
11014         list_for_each_entry(dev, dev_list, dev_list)
11015                 total_bytes += dev->total_bytes;
11016
11017         /* Important check, which can cause unmountable fs */
11018         if (super_bytes < total_bytes) {
11019                 error("super total bytes %llu smaller than real device(s) size %llu",
11020                         super_bytes, total_bytes);
11021                 error("mounting this fs may fail for newer kernels");
11022                 error("this can be fixed by 'btrfs rescue fix-device-size'");
11023                 return false;
11024         }
11025
11026         /*
11027          * Optional check, just to make everything aligned and match with each
11028          * other.
11029          *
11030          * For a btrfs-image restored fs, we don't need to check it anyway.
11031          */
11032         if (btrfs_super_flags(fs_info->super_copy) &
11033             (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2))
11034                 return true;
11035         if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) ||
11036             !IS_ALIGNED(total_bytes, fs_info->sectorsize) ||
11037             super_bytes != total_bytes) {
11038                 warning("minor unaligned/mismatch device size detected");
11039                 warning(
11040                 "recommended to use 'btrfs rescue fix-device-size' to fix it");
11041         }
11042         return true;
11043 }
11044
11045 /* check btrfs_dev_item -> btrfs_dev_extent */
11046 static int check_devices(struct rb_root *dev_cache,
11047                          struct device_extent_tree *dev_extent_cache)
11048 {
11049         struct rb_node *dev_node;
11050         struct device_record *dev_rec;
11051         struct device_extent_record *dext_rec;
11052         int err;
11053         int ret = 0;
11054
11055         dev_node = rb_first(dev_cache);
11056         while (dev_node) {
11057                 dev_rec = container_of(dev_node, struct device_record, node);
11058                 err = check_device_used(dev_rec, dev_extent_cache);
11059                 if (err)
11060                         ret = err;
11061
11062                 check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte,
11063                                          global_info->sectorsize);
11064                 dev_node = rb_next(dev_node);
11065         }
11066         list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans,
11067                             device_list) {
11068                 fprintf(stderr,
11069                         "Device extent[%llu, %llu, %llu] didn't find its device.\n",
11070                         dext_rec->objectid, dext_rec->offset, dext_rec->length);
11071                 if (!ret)
11072                         ret = 1;
11073         }
11074         return ret;
11075 }
11076
11077 static int add_root_item_to_list(struct list_head *head,
11078                                   u64 objectid, u64 bytenr, u64 last_snapshot,
11079                                   u8 level, u8 drop_level,
11080                                   struct btrfs_key *drop_key)
11081 {
11082
11083         struct root_item_record *ri_rec;
11084         ri_rec = malloc(sizeof(*ri_rec));
11085         if (!ri_rec)
11086                 return -ENOMEM;
11087         ri_rec->bytenr = bytenr;
11088         ri_rec->objectid = objectid;
11089         ri_rec->level = level;
11090         ri_rec->drop_level = drop_level;
11091         ri_rec->last_snapshot = last_snapshot;
11092         if (drop_key)
11093                 memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key));
11094         list_add_tail(&ri_rec->list, head);
11095
11096         return 0;
11097 }
11098
11099 static void free_root_item_list(struct list_head *list)
11100 {
11101         struct root_item_record *ri_rec;
11102
11103         while (!list_empty(list)) {
11104                 ri_rec = list_first_entry(list, struct root_item_record,
11105                                           list);
11106                 list_del_init(&ri_rec->list);
11107                 free(ri_rec);
11108         }
11109 }
11110
11111 static int deal_root_from_list(struct list_head *list,
11112                                struct btrfs_root *root,
11113                                struct block_info *bits,
11114                                int bits_nr,
11115                                struct cache_tree *pending,
11116                                struct cache_tree *seen,
11117                                struct cache_tree *reada,
11118                                struct cache_tree *nodes,
11119                                struct cache_tree *extent_cache,
11120                                struct cache_tree *chunk_cache,
11121                                struct rb_root *dev_cache,
11122                                struct block_group_tree *block_group_cache,
11123                                struct device_extent_tree *dev_extent_cache)
11124 {
11125         int ret = 0;
11126         u64 last;
11127
11128         while (!list_empty(list)) {
11129                 struct root_item_record *rec;
11130                 struct extent_buffer *buf;
11131                 rec = list_entry(list->next,
11132                                  struct root_item_record, list);
11133                 last = 0;
11134                 buf = read_tree_block(root->fs_info, rec->bytenr, 0);
11135                 if (!extent_buffer_uptodate(buf)) {
11136                         free_extent_buffer(buf);
11137                         ret = -EIO;
11138                         break;
11139                 }
11140                 ret = add_root_to_pending(buf, extent_cache, pending,
11141                                     seen, nodes, rec->objectid);
11142                 if (ret < 0)
11143                         break;
11144                 /*
11145                  * To rebuild extent tree, we need deal with snapshot
11146                  * one by one, otherwise we deal with node firstly which
11147                  * can maximize readahead.
11148                  */
11149                 while (1) {
11150                         ret = run_next_block(root, bits, bits_nr, &last,
11151                                              pending, seen, reada, nodes,
11152                                              extent_cache, chunk_cache,
11153                                              dev_cache, block_group_cache,
11154                                              dev_extent_cache, rec);
11155                         if (ret != 0)
11156                                 break;
11157                 }
11158                 free_extent_buffer(buf);
11159                 list_del(&rec->list);
11160                 free(rec);
11161                 if (ret < 0)
11162                         break;
11163         }
11164         while (ret >= 0) {
11165                 ret = run_next_block(root, bits, bits_nr, &last, pending, seen,
11166                                      reada, nodes, extent_cache, chunk_cache,
11167                                      dev_cache, block_group_cache,
11168                                      dev_extent_cache, NULL);
11169                 if (ret != 0) {
11170                         if (ret > 0)
11171                                 ret = 0;
11172                         break;
11173                 }
11174         }
11175         return ret;
11176 }
11177
11178 static int check_chunks_and_extents(struct btrfs_fs_info *fs_info)
11179 {
11180         struct rb_root dev_cache;
11181         struct cache_tree chunk_cache;
11182         struct block_group_tree block_group_cache;
11183         struct device_extent_tree dev_extent_cache;
11184         struct cache_tree extent_cache;
11185         struct cache_tree seen;
11186         struct cache_tree pending;
11187         struct cache_tree reada;
11188         struct cache_tree nodes;
11189         struct extent_io_tree excluded_extents;
11190         struct cache_tree corrupt_blocks;
11191         struct btrfs_path path;
11192         struct btrfs_key key;
11193         struct btrfs_key found_key;
11194         int ret, err = 0;
11195         struct block_info *bits;
11196         int bits_nr;
11197         struct extent_buffer *leaf;
11198         int slot;
11199         struct btrfs_root_item ri;
11200         struct list_head dropping_trees;
11201         struct list_head normal_trees;
11202         struct btrfs_root *root1;
11203         struct btrfs_root *root;
11204         u64 objectid;
11205         u8 level;
11206
11207         root = fs_info->fs_root;
11208         dev_cache = RB_ROOT;
11209         cache_tree_init(&chunk_cache);
11210         block_group_tree_init(&block_group_cache);
11211         device_extent_tree_init(&dev_extent_cache);
11212
11213         cache_tree_init(&extent_cache);
11214         cache_tree_init(&seen);
11215         cache_tree_init(&pending);
11216         cache_tree_init(&nodes);
11217         cache_tree_init(&reada);
11218         cache_tree_init(&corrupt_blocks);
11219         extent_io_tree_init(&excluded_extents);
11220         INIT_LIST_HEAD(&dropping_trees);
11221         INIT_LIST_HEAD(&normal_trees);
11222
11223         if (repair) {
11224                 fs_info->excluded_extents = &excluded_extents;
11225                 fs_info->fsck_extent_cache = &extent_cache;
11226                 fs_info->free_extent_hook = free_extent_hook;
11227                 fs_info->corrupt_blocks = &corrupt_blocks;
11228         }
11229
11230         bits_nr = 1024;
11231         bits = malloc(bits_nr * sizeof(struct block_info));
11232         if (!bits) {
11233                 perror("malloc");
11234                 exit(1);
11235         }
11236
11237         if (ctx.progress_enabled) {
11238                 ctx.tp = TASK_EXTENTS;
11239                 task_start(ctx.info);
11240         }
11241
11242 again:
11243         root1 = fs_info->tree_root;
11244         level = btrfs_header_level(root1->node);
11245         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11246                                     root1->node->start, 0, level, 0, NULL);
11247         if (ret < 0)
11248                 goto out;
11249         root1 = fs_info->chunk_root;
11250         level = btrfs_header_level(root1->node);
11251         ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid,
11252                                     root1->node->start, 0, level, 0, NULL);
11253         if (ret < 0)
11254                 goto out;
11255         btrfs_init_path(&path);
11256         key.offset = 0;
11257         key.objectid = 0;
11258         key.type = BTRFS_ROOT_ITEM_KEY;
11259         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0);
11260         if (ret < 0)
11261                 goto out;
11262         while(1) {
11263                 leaf = path.nodes[0];
11264                 slot = path.slots[0];
11265                 if (slot >= btrfs_header_nritems(path.nodes[0])) {
11266                         ret = btrfs_next_leaf(root, &path);
11267                         if (ret != 0)
11268                                 break;
11269                         leaf = path.nodes[0];
11270                         slot = path.slots[0];
11271                 }
11272                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
11273                 if (found_key.type == BTRFS_ROOT_ITEM_KEY) {
11274                         unsigned long offset;
11275                         u64 last_snapshot;
11276
11277                         offset = btrfs_item_ptr_offset(leaf, path.slots[0]);
11278                         read_extent_buffer(leaf, &ri, offset, sizeof(ri));
11279                         last_snapshot = btrfs_root_last_snapshot(&ri);
11280                         if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) {
11281                                 level = btrfs_root_level(&ri);
11282                                 ret = add_root_item_to_list(&normal_trees,
11283                                                 found_key.objectid,
11284                                                 btrfs_root_bytenr(&ri),
11285                                                 last_snapshot, level,
11286                                                 0, NULL);
11287                                 if (ret < 0)
11288                                         goto out;
11289                         } else {
11290                                 level = btrfs_root_level(&ri);
11291                                 objectid = found_key.objectid;
11292                                 btrfs_disk_key_to_cpu(&found_key,
11293                                                       &ri.drop_progress);
11294                                 ret = add_root_item_to_list(&dropping_trees,
11295                                                 objectid,
11296                                                 btrfs_root_bytenr(&ri),
11297                                                 last_snapshot, level,
11298                                                 ri.drop_level, &found_key);
11299                                 if (ret < 0)
11300                                         goto out;
11301                         }
11302                 }
11303                 path.slots[0]++;
11304         }
11305         btrfs_release_path(&path);
11306
11307         /*
11308          * check_block can return -EAGAIN if it fixes something, please keep
11309          * this in mind when dealing with return values from these functions, if
11310          * we get -EAGAIN we want to fall through and restart the loop.
11311          */
11312         ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending,
11313                                   &seen, &reada, &nodes, &extent_cache,
11314                                   &chunk_cache, &dev_cache, &block_group_cache,
11315                                   &dev_extent_cache);
11316         if (ret < 0) {
11317                 if (ret == -EAGAIN)
11318                         goto loop;
11319                 goto out;
11320         }
11321         ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr,
11322                                   &pending, &seen, &reada, &nodes,
11323                                   &extent_cache, &chunk_cache, &dev_cache,
11324                                   &block_group_cache, &dev_extent_cache);
11325         if (ret < 0) {
11326                 if (ret == -EAGAIN)
11327                         goto loop;
11328                 goto out;
11329         }
11330
11331         ret = check_chunks(&chunk_cache, &block_group_cache,
11332                            &dev_extent_cache, NULL, NULL, NULL, 0);
11333         if (ret) {
11334                 if (ret == -EAGAIN)
11335                         goto loop;
11336                 err = ret;
11337         }
11338
11339         ret = check_extent_refs(root, &extent_cache);
11340         if (ret < 0) {
11341                 if (ret == -EAGAIN)
11342                         goto loop;
11343                 goto out;
11344         }
11345
11346         ret = check_devices(&dev_cache, &dev_extent_cache);
11347         if (ret && err)
11348                 ret = err;
11349
11350 out:
11351         task_stop(ctx.info);
11352         if (repair) {
11353                 free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11354                 extent_io_tree_cleanup(&excluded_extents);
11355                 fs_info->fsck_extent_cache = NULL;
11356                 fs_info->free_extent_hook = NULL;
11357                 fs_info->corrupt_blocks = NULL;
11358                 fs_info->excluded_extents = NULL;
11359         }
11360         free(bits);
11361         free_chunk_cache_tree(&chunk_cache);
11362         free_device_cache_tree(&dev_cache);
11363         free_block_group_tree(&block_group_cache);
11364         free_device_extent_tree(&dev_extent_cache);
11365         free_extent_cache_tree(&seen);
11366         free_extent_cache_tree(&pending);
11367         free_extent_cache_tree(&reada);
11368         free_extent_cache_tree(&nodes);
11369         free_root_item_list(&normal_trees);
11370         free_root_item_list(&dropping_trees);
11371         return ret;
11372 loop:
11373         free_corrupt_blocks_tree(fs_info->corrupt_blocks);
11374         free_extent_cache_tree(&seen);
11375         free_extent_cache_tree(&pending);
11376         free_extent_cache_tree(&reada);
11377         free_extent_cache_tree(&nodes);
11378         free_chunk_cache_tree(&chunk_cache);
11379         free_block_group_tree(&block_group_cache);
11380         free_device_cache_tree(&dev_cache);
11381         free_device_extent_tree(&dev_extent_cache);
11382         free_extent_record_cache(&extent_cache);
11383         free_root_item_list(&normal_trees);
11384         free_root_item_list(&dropping_trees);
11385         extent_io_tree_cleanup(&excluded_extents);
11386         goto again;
11387 }
11388
11389 static int check_extent_inline_ref(struct extent_buffer *eb,
11390                    struct btrfs_key *key, struct btrfs_extent_inline_ref *iref)
11391 {
11392         int ret;
11393         u8 type = btrfs_extent_inline_ref_type(eb, iref);
11394
11395         switch (type) {
11396         case BTRFS_TREE_BLOCK_REF_KEY:
11397         case BTRFS_EXTENT_DATA_REF_KEY:
11398         case BTRFS_SHARED_BLOCK_REF_KEY:
11399         case BTRFS_SHARED_DATA_REF_KEY:
11400                 ret = 0;
11401                 break;
11402         default:
11403                 error("extent[%llu %u %llu] has unknown ref type: %d",
11404                       key->objectid, key->type, key->offset, type);
11405                 ret = UNKNOWN_TYPE;
11406                 break;
11407         }
11408
11409         return ret;
11410 }
11411
11412 /*
11413  * Check backrefs of a tree block given by @bytenr or @eb.
11414  *
11415  * @root:       the root containing the @bytenr or @eb
11416  * @eb:         tree block extent buffer, can be NULL
11417  * @bytenr:     bytenr of the tree block to search
11418  * @level:      tree level of the tree block
11419  * @owner:      owner of the tree block
11420  *
11421  * Return >0 for any error found and output error message
11422  * Return 0 for no error found
11423  */
11424 static int check_tree_block_ref(struct btrfs_root *root,
11425                                 struct extent_buffer *eb, u64 bytenr,
11426                                 int level, u64 owner, struct node_refs *nrefs)
11427 {
11428         struct btrfs_key key;
11429         struct btrfs_root *extent_root = root->fs_info->extent_root;
11430         struct btrfs_path path;
11431         struct btrfs_extent_item *ei;
11432         struct btrfs_extent_inline_ref *iref;
11433         struct extent_buffer *leaf;
11434         unsigned long end;
11435         unsigned long ptr;
11436         int slot;
11437         int skinny_level;
11438         int root_level = btrfs_header_level(root->node);
11439         int type;
11440         u32 nodesize = root->fs_info->nodesize;
11441         u32 item_size;
11442         u64 offset;
11443         int found_ref = 0;
11444         int err = 0;
11445         int ret;
11446         int strict = 1;
11447         int parent = 0;
11448
11449         btrfs_init_path(&path);
11450         key.objectid = bytenr;
11451         if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
11452                 key.type = BTRFS_METADATA_ITEM_KEY;
11453         else
11454                 key.type = BTRFS_EXTENT_ITEM_KEY;
11455         key.offset = (u64)-1;
11456
11457         /* Search for the backref in extent tree */
11458         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11459         if (ret < 0) {
11460                 err |= BACKREF_MISSING;
11461                 goto out;
11462         }
11463         ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11464         if (ret) {
11465                 err |= BACKREF_MISSING;
11466                 goto out;
11467         }
11468
11469         leaf = path.nodes[0];
11470         slot = path.slots[0];
11471         btrfs_item_key_to_cpu(leaf, &key, slot);
11472
11473         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11474
11475         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11476                 skinny_level = (int)key.offset;
11477                 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11478         } else {
11479                 struct btrfs_tree_block_info *info;
11480
11481                 info = (struct btrfs_tree_block_info *)(ei + 1);
11482                 skinny_level = btrfs_tree_block_level(leaf, info);
11483                 iref = (struct btrfs_extent_inline_ref *)(info + 1);
11484         }
11485
11486
11487         if (eb) {
11488                 u64 header_gen;
11489                 u64 extent_gen;
11490
11491                 /*
11492                  * Due to the feature of shared tree blocks, if the upper node
11493                  * is a fs root or shared node, the extent of checked node may
11494                  * not be updated until the next CoW.
11495                  */
11496                 if (nrefs)
11497                         strict = should_check_extent_strictly(root, nrefs,
11498                                         level);
11499                 if (!(btrfs_extent_flags(leaf, ei) &
11500                       BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11501                         error(
11502                 "extent[%llu %u] backref type mismatch, missing bit: %llx",
11503                                 key.objectid, nodesize,
11504                                 BTRFS_EXTENT_FLAG_TREE_BLOCK);
11505                         err = BACKREF_MISMATCH;
11506                 }
11507                 header_gen = btrfs_header_generation(eb);
11508                 extent_gen = btrfs_extent_generation(leaf, ei);
11509                 if (header_gen != extent_gen) {
11510                         error(
11511         "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu",
11512                                 key.objectid, nodesize, header_gen,
11513                                 extent_gen);
11514                         err = BACKREF_MISMATCH;
11515                 }
11516                 if (level != skinny_level) {
11517                         error(
11518                         "extent[%llu %u] level mismatch, wanted: %u, have: %u",
11519                                 key.objectid, nodesize, level, skinny_level);
11520                         err = BACKREF_MISMATCH;
11521                 }
11522                 if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) {
11523                         error(
11524                         "extent[%llu %u] is referred by other roots than %llu",
11525                                 key.objectid, nodesize, root->objectid);
11526                         err = BACKREF_MISMATCH;
11527                 }
11528         }
11529
11530         /*
11531          * Iterate the extent/metadata item to find the exact backref
11532          */
11533         item_size = btrfs_item_size_nr(leaf, slot);
11534         ptr = (unsigned long)iref;
11535         end = (unsigned long)ei + item_size;
11536
11537         while (ptr < end) {
11538                 iref = (struct btrfs_extent_inline_ref *)ptr;
11539                 type = btrfs_extent_inline_ref_type(leaf, iref);
11540                 offset = btrfs_extent_inline_ref_offset(leaf, iref);
11541
11542                 ret = check_extent_inline_ref(leaf, &key, iref);
11543                 if (ret) {
11544                         err |= ret;
11545                         break;
11546                 }
11547                 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
11548                         if (offset == root->objectid)
11549                                 found_ref = 1;
11550                         if (!strict && owner == offset)
11551                                 found_ref = 1;
11552                 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
11553                         /*
11554                          * Backref of tree reloc root points to itself, no need
11555                          * to check backref any more.
11556                          *
11557                          * This may be an error of loop backref, but extent tree
11558                          * checker should have already handled it.
11559                          * Here we only need to avoid infinite iteration.
11560                          */
11561                         if (offset == bytenr) {
11562                                 found_ref = 1;
11563                         } else {
11564                                 /*
11565                                  * Check if the backref points to valid
11566                                  * referencer
11567                                  */
11568                                 found_ref = !check_tree_block_ref( root, NULL,
11569                                                 offset, level + 1, owner,
11570                                                 NULL);
11571                         }
11572                 }
11573
11574                 if (found_ref)
11575                         break;
11576                 ptr += btrfs_extent_inline_ref_size(type);
11577         }
11578
11579         /*
11580          * Inlined extent item doesn't have what we need, check
11581          * TREE_BLOCK_REF_KEY
11582          */
11583         if (!found_ref) {
11584                 btrfs_release_path(&path);
11585                 key.objectid = bytenr;
11586                 key.type = BTRFS_TREE_BLOCK_REF_KEY;
11587                 key.offset = root->objectid;
11588
11589                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11590                 if (!ret)
11591                         found_ref = 1;
11592         }
11593         /*
11594          * Finally check SHARED BLOCK REF, any found will be good
11595          * Here we're not doing comprehensive extent backref checking,
11596          * only need to ensure there is some extent referring to this
11597          * tree block.
11598          */
11599         if (!found_ref) {
11600                 btrfs_release_path(&path);
11601                 key.objectid = bytenr;
11602                 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
11603                 key.offset = (u64)-1;
11604
11605                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11606                 if (ret < 0) {
11607                         err |= BACKREF_MISSING;
11608                         goto out;
11609                 }
11610                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
11611                 if (ret) {
11612                         err |= BACKREF_MISSING;
11613                         goto out;
11614                 }
11615                 found_ref = 1;
11616         }
11617         if (!found_ref)
11618                 err |= BACKREF_MISSING;
11619 out:
11620         btrfs_release_path(&path);
11621         if (nrefs && strict &&
11622             level < root_level && nrefs->full_backref[level + 1])
11623                 parent = nrefs->bytenr[level + 1];
11624         if (eb && (err & BACKREF_MISSING))
11625                 error(
11626         "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu",
11627                       bytenr, nodesize, owner, level,
11628                       parent ? "parent" : "root",
11629                       parent ? parent : root->objectid);
11630         return err;
11631 }
11632
11633 /*
11634  * If @err contains BACKREF_MISSING then add extent of the
11635  * file_extent_data_item.
11636  *
11637  * Returns error bits after reapir.
11638  */
11639 static int repair_extent_data_item(struct btrfs_trans_handle *trans,
11640                                    struct btrfs_root *root,
11641                                    struct btrfs_path *pathp,
11642                                    struct node_refs *nrefs,
11643                                    int err)
11644 {
11645         struct btrfs_file_extent_item *fi;
11646         struct btrfs_key fi_key;
11647         struct btrfs_key key;
11648         struct btrfs_extent_item *ei;
11649         struct btrfs_path path;
11650         struct btrfs_root *extent_root = root->fs_info->extent_root;
11651         struct extent_buffer *eb;
11652         u64 size;
11653         u64 disk_bytenr;
11654         u64 num_bytes;
11655         u64 parent;
11656         u64 offset;
11657         u64 extent_offset;
11658         u64 file_offset;
11659         int generation;
11660         int slot;
11661         int ret = 0;
11662
11663         eb = pathp->nodes[0];
11664         slot = pathp->slots[0];
11665         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11666         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11667
11668         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11669             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11670                 return err;
11671
11672         file_offset = fi_key.offset;
11673         generation = btrfs_file_extent_generation(eb, fi);
11674         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11675         num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11676         extent_offset = btrfs_file_extent_offset(eb, fi);
11677         offset = file_offset - extent_offset;
11678
11679         /* now repair only adds backref */
11680         if ((err & BACKREF_MISSING) == 0)
11681                 return err;
11682
11683         /* search extent item */
11684         key.objectid = disk_bytenr;
11685         key.type = BTRFS_EXTENT_ITEM_KEY;
11686         key.offset = num_bytes;
11687
11688         btrfs_init_path(&path);
11689         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
11690         if (ret < 0) {
11691                 ret = -EIO;
11692                 goto out;
11693         }
11694
11695         /* insert an extent item */
11696         if (ret > 0) {
11697                 key.objectid = disk_bytenr;
11698                 key.type = BTRFS_EXTENT_ITEM_KEY;
11699                 key.offset = num_bytes;
11700                 size = sizeof(*ei);
11701
11702                 btrfs_release_path(&path);
11703                 ret = btrfs_insert_empty_item(trans, extent_root, &path, &key,
11704                                               size);
11705                 if (ret)
11706                         goto out;
11707                 eb = path.nodes[0];
11708                 ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
11709
11710                 btrfs_set_extent_refs(eb, ei, 0);
11711                 btrfs_set_extent_generation(eb, ei, generation);
11712                 btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA);
11713
11714                 btrfs_mark_buffer_dirty(eb);
11715                 ret = btrfs_update_block_group(extent_root, disk_bytenr,
11716                                                num_bytes, 1, 0);
11717                 btrfs_release_path(&path);
11718         }
11719
11720         if (nrefs->full_backref[0])
11721                 parent = btrfs_header_bytenr(eb);
11722         else
11723                 parent = 0;
11724
11725         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent,
11726                                    root->objectid,
11727                    parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid,
11728                                    offset);
11729         if (ret) {
11730                 error(
11731                 "failed to increase extent data backref[%llu %llu] root %llu",
11732                       disk_bytenr, num_bytes, root->objectid);
11733                 goto out;
11734         } else {
11735                 printf("Add one extent data backref [%llu %llu]\n",
11736                        disk_bytenr, num_bytes);
11737         }
11738
11739         err &= ~BACKREF_MISSING;
11740 out:
11741         if (ret)
11742                 error("can't repair root %llu extent data item[%llu %llu]",
11743                       root->objectid, disk_bytenr, num_bytes);
11744         return err;
11745 }
11746
11747 /*
11748  * Check EXTENT_DATA item, mainly for its dbackref in extent tree
11749  *
11750  * Return >0 any error found and output error message
11751  * Return 0 for no error found
11752  */
11753 static int check_extent_data_item(struct btrfs_root *root,
11754                                   struct btrfs_path *pathp,
11755                                   struct node_refs *nrefs,  int account_bytes)
11756 {
11757         struct btrfs_file_extent_item *fi;
11758         struct extent_buffer *eb = pathp->nodes[0];
11759         struct btrfs_path path;
11760         struct btrfs_root *extent_root = root->fs_info->extent_root;
11761         struct btrfs_key fi_key;
11762         struct btrfs_key dbref_key;
11763         struct extent_buffer *leaf;
11764         struct btrfs_extent_item *ei;
11765         struct btrfs_extent_inline_ref *iref;
11766         struct btrfs_extent_data_ref *dref;
11767         u64 owner;
11768         u64 disk_bytenr;
11769         u64 disk_num_bytes;
11770         u64 extent_num_bytes;
11771         u64 extent_flags;
11772         u64 offset;
11773         u32 item_size;
11774         unsigned long end;
11775         unsigned long ptr;
11776         int type;
11777         int found_dbackref = 0;
11778         int slot = pathp->slots[0];
11779         int err = 0;
11780         int ret;
11781         int strict;
11782
11783         btrfs_item_key_to_cpu(eb, &fi_key, slot);
11784         fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
11785
11786         /* Nothing to check for hole and inline data extents */
11787         if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE ||
11788             btrfs_file_extent_disk_bytenr(eb, fi) == 0)
11789                 return 0;
11790
11791         disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
11792         disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
11793         extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi);
11794         offset = btrfs_file_extent_offset(eb, fi);
11795
11796         /* Check unaligned disk_num_bytes and num_bytes */
11797         if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) {
11798                 error(
11799 "file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u",
11800                         fi_key.objectid, fi_key.offset, disk_num_bytes,
11801                         root->fs_info->sectorsize);
11802                 err |= BYTES_UNALIGNED;
11803         } else if (account_bytes) {
11804                 data_bytes_allocated += disk_num_bytes;
11805         }
11806         if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) {
11807                 error(
11808 "file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u",
11809                         fi_key.objectid, fi_key.offset, extent_num_bytes,
11810                         root->fs_info->sectorsize);
11811                 err |= BYTES_UNALIGNED;
11812         } else if (account_bytes) {
11813                 data_bytes_referenced += extent_num_bytes;
11814         }
11815         owner = btrfs_header_owner(eb);
11816
11817         /* Check the extent item of the file extent in extent tree */
11818         btrfs_init_path(&path);
11819         dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11820         dbref_key.type = BTRFS_EXTENT_ITEM_KEY;
11821         dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi);
11822
11823         ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0);
11824         if (ret)
11825                 goto out;
11826
11827         leaf = path.nodes[0];
11828         slot = path.slots[0];
11829         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
11830
11831         extent_flags = btrfs_extent_flags(leaf, ei);
11832
11833         if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) {
11834                 error(
11835                     "extent[%llu %llu] backref type mismatch, wanted bit: %llx",
11836                     disk_bytenr, disk_num_bytes,
11837                     BTRFS_EXTENT_FLAG_DATA);
11838                 err |= BACKREF_MISMATCH;
11839         }
11840
11841         /* Check data backref inside that extent item */
11842         item_size = btrfs_item_size_nr(leaf, path.slots[0]);
11843         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
11844         ptr = (unsigned long)iref;
11845         end = (unsigned long)ei + item_size;
11846         strict = should_check_extent_strictly(root, nrefs, -1);
11847
11848         while (ptr < end) {
11849                 u64 ref_root;
11850                 u64 ref_objectid;
11851                 u64 ref_offset;
11852                 bool match = false;
11853
11854                 iref = (struct btrfs_extent_inline_ref *)ptr;
11855                 type = btrfs_extent_inline_ref_type(leaf, iref);
11856                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11857
11858                 ret = check_extent_inline_ref(leaf, &dbref_key, iref);
11859                 if (ret) {
11860                         err |= ret;
11861                         break;
11862                 }
11863                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11864                         ref_root = btrfs_extent_data_ref_root(leaf, dref);
11865                         ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref);
11866                         ref_offset = btrfs_extent_data_ref_offset(leaf, dref);
11867
11868                         if (ref_objectid == fi_key.objectid &&
11869                             ref_offset == fi_key.offset - offset)
11870                                 match = true;
11871                         if (ref_root == root->objectid && match)
11872                                 found_dbackref = 1;
11873                         else if (!strict && owner == ref_root && match)
11874                                 found_dbackref = 1;
11875                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11876                         found_dbackref = !check_tree_block_ref(root, NULL,
11877                                 btrfs_extent_inline_ref_offset(leaf, iref),
11878                                 0, owner, NULL);
11879                 }
11880
11881                 if (found_dbackref)
11882                         break;
11883                 ptr += btrfs_extent_inline_ref_size(type);
11884         }
11885
11886         if (!found_dbackref) {
11887                 btrfs_release_path(&path);
11888
11889                 /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */
11890                 dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi);
11891                 dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY;
11892                 dbref_key.offset = hash_extent_data_ref(root->objectid,
11893                                 fi_key.objectid, fi_key.offset - offset);
11894
11895                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11896                                         &dbref_key, &path, 0, 0);
11897                 if (!ret) {
11898                         found_dbackref = 1;
11899                         goto out;
11900                 }
11901
11902                 btrfs_release_path(&path);
11903
11904                 /*
11905                  * Neither inlined nor EXTENT_DATA_REF found, try
11906                  * SHARED_DATA_REF as last chance.
11907                  */
11908                 dbref_key.objectid = disk_bytenr;
11909                 dbref_key.type = BTRFS_SHARED_DATA_REF_KEY;
11910                 dbref_key.offset = eb->start;
11911
11912                 ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
11913                                         &dbref_key, &path, 0, 0);
11914                 if (!ret) {
11915                         found_dbackref = 1;
11916                         goto out;
11917                 }
11918         }
11919
11920 out:
11921         if (!found_dbackref)
11922                 err |= BACKREF_MISSING;
11923         btrfs_release_path(&path);
11924         if (err & BACKREF_MISSING) {
11925                 error("data extent[%llu %llu] backref lost",
11926                       disk_bytenr, disk_num_bytes);
11927         }
11928         return err;
11929 }
11930
11931 /*
11932  * Get real tree block level for the case like shared block
11933  * Return >= 0 as tree level
11934  * Return <0 for error
11935  */
11936 static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr)
11937 {
11938         struct extent_buffer *eb;
11939         struct btrfs_path path;
11940         struct btrfs_key key;
11941         struct btrfs_extent_item *ei;
11942         u64 flags;
11943         u64 transid;
11944         u8 backref_level;
11945         u8 header_level;
11946         int ret;
11947
11948         /* Search extent tree for extent generation and level */
11949         key.objectid = bytenr;
11950         key.type = BTRFS_METADATA_ITEM_KEY;
11951         key.offset = (u64)-1;
11952
11953         btrfs_init_path(&path);
11954         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
11955         if (ret < 0)
11956                 goto release_out;
11957         ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr);
11958         if (ret < 0)
11959                 goto release_out;
11960         if (ret > 0) {
11961                 ret = -ENOENT;
11962                 goto release_out;
11963         }
11964
11965         btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
11966         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
11967                             struct btrfs_extent_item);
11968         flags = btrfs_extent_flags(path.nodes[0], ei);
11969         if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
11970                 ret = -ENOENT;
11971                 goto release_out;
11972         }
11973
11974         /* Get transid for later read_tree_block() check */
11975         transid = btrfs_extent_generation(path.nodes[0], ei);
11976
11977         /* Get backref level as one source */
11978         if (key.type == BTRFS_METADATA_ITEM_KEY) {
11979                 backref_level = key.offset;
11980         } else {
11981                 struct btrfs_tree_block_info *info;
11982
11983                 info = (struct btrfs_tree_block_info *)(ei + 1);
11984                 backref_level = btrfs_tree_block_level(path.nodes[0], info);
11985         }
11986         btrfs_release_path(&path);
11987
11988         /* Get level from tree block as an alternative source */
11989         eb = read_tree_block(fs_info, bytenr, transid);
11990         if (!extent_buffer_uptodate(eb)) {
11991                 free_extent_buffer(eb);
11992                 return -EIO;
11993         }
11994         header_level = btrfs_header_level(eb);
11995         free_extent_buffer(eb);
11996
11997         if (header_level != backref_level)
11998                 return -EIO;
11999         return header_level;
12000
12001 release_out:
12002         btrfs_release_path(&path);
12003         return ret;
12004 }
12005
12006 /*
12007  * Check if a tree block backref is valid (points to a valid tree block)
12008  * if level == -1, level will be resolved
12009  * Return >0 for any error found and print error message
12010  */
12011 static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id,
12012                                     u64 bytenr, int level)
12013 {
12014         struct btrfs_root *root;
12015         struct btrfs_key key;
12016         struct btrfs_path path;
12017         struct extent_buffer *eb;
12018         struct extent_buffer *node;
12019         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12020         int err = 0;
12021         int ret;
12022
12023         /* Query level for level == -1 special case */
12024         if (level == -1)
12025                 level = query_tree_block_level(fs_info, bytenr);
12026         if (level < 0) {
12027                 err |= REFERENCER_MISSING;
12028                 goto out;
12029         }
12030
12031         key.objectid = root_id;
12032         key.type = BTRFS_ROOT_ITEM_KEY;
12033         key.offset = (u64)-1;
12034
12035         root = btrfs_read_fs_root(fs_info, &key);
12036         if (IS_ERR(root)) {
12037                 err |= REFERENCER_MISSING;
12038                 goto out;
12039         }
12040
12041         /* Read out the tree block to get item/node key */
12042         eb = read_tree_block(fs_info, bytenr, 0);
12043         if (!extent_buffer_uptodate(eb)) {
12044                 err |= REFERENCER_MISSING;
12045                 free_extent_buffer(eb);
12046                 goto out;
12047         }
12048
12049         /* Empty tree, no need to check key */
12050         if (!btrfs_header_nritems(eb) && !level) {
12051                 free_extent_buffer(eb);
12052                 goto out;
12053         }
12054
12055         if (level)
12056                 btrfs_node_key_to_cpu(eb, &key, 0);
12057         else
12058                 btrfs_item_key_to_cpu(eb, &key, 0);
12059
12060         free_extent_buffer(eb);
12061
12062         btrfs_init_path(&path);
12063         path.lowest_level = level;
12064         /* Search with the first key, to ensure we can reach it */
12065         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12066         if (ret < 0) {
12067                 err |= REFERENCER_MISSING;
12068                 goto release_out;
12069         }
12070
12071         node = path.nodes[level];
12072         if (btrfs_header_bytenr(node) != bytenr) {
12073                 error(
12074         "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu",
12075                         bytenr, nodesize, bytenr,
12076                         btrfs_header_bytenr(node));
12077                 err |= REFERENCER_MISMATCH;
12078         }
12079         if (btrfs_header_level(node) != level) {
12080                 error(
12081         "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d",
12082                         bytenr, nodesize, level,
12083                         btrfs_header_level(node));
12084                 err |= REFERENCER_MISMATCH;
12085         }
12086
12087 release_out:
12088         btrfs_release_path(&path);
12089 out:
12090         if (err & REFERENCER_MISSING) {
12091                 if (level < 0)
12092                         error("extent [%llu %d] lost referencer (owner: %llu)",
12093                                 bytenr, nodesize, root_id);
12094                 else
12095                         error(
12096                 "extent [%llu %d] lost referencer (owner: %llu, level: %u)",
12097                                 bytenr, nodesize, root_id, level);
12098         }
12099
12100         return err;
12101 }
12102
12103 /*
12104  * Check if tree block @eb is tree reloc root.
12105  * Return 0 if it's not or any problem happens
12106  * Return 1 if it's a tree reloc root
12107  */
12108 static int is_tree_reloc_root(struct btrfs_fs_info *fs_info,
12109                                  struct extent_buffer *eb)
12110 {
12111         struct btrfs_root *tree_reloc_root;
12112         struct btrfs_key key;
12113         u64 bytenr = btrfs_header_bytenr(eb);
12114         u64 owner = btrfs_header_owner(eb);
12115         int ret = 0;
12116
12117         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
12118         key.offset = owner;
12119         key.type = BTRFS_ROOT_ITEM_KEY;
12120
12121         tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key);
12122         if (IS_ERR(tree_reloc_root))
12123                 return 0;
12124
12125         if (bytenr == btrfs_header_bytenr(tree_reloc_root->node))
12126                 ret = 1;
12127         btrfs_free_fs_root(tree_reloc_root);
12128         return ret;
12129 }
12130
12131 /*
12132  * Check referencer for shared block backref
12133  * If level == -1, this function will resolve the level.
12134  */
12135 static int check_shared_block_backref(struct btrfs_fs_info *fs_info,
12136                                      u64 parent, u64 bytenr, int level)
12137 {
12138         struct extent_buffer *eb;
12139         u32 nr;
12140         int found_parent = 0;
12141         int i;
12142
12143         eb = read_tree_block(fs_info, parent, 0);
12144         if (!extent_buffer_uptodate(eb))
12145                 goto out;
12146
12147         if (level == -1)
12148                 level = query_tree_block_level(fs_info, bytenr);
12149         if (level < 0)
12150                 goto out;
12151
12152         /* It's possible it's a tree reloc root */
12153         if (parent == bytenr) {
12154                 if (is_tree_reloc_root(fs_info, eb))
12155                         found_parent = 1;
12156                 goto out;
12157         }
12158
12159         if (level + 1 != btrfs_header_level(eb))
12160                 goto out;
12161
12162         nr = btrfs_header_nritems(eb);
12163         for (i = 0; i < nr; i++) {
12164                 if (bytenr == btrfs_node_blockptr(eb, i)) {
12165                         found_parent = 1;
12166                         break;
12167                 }
12168         }
12169 out:
12170         free_extent_buffer(eb);
12171         if (!found_parent) {
12172                 error(
12173         "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)",
12174                         bytenr, fs_info->nodesize, parent, level);
12175                 return REFERENCER_MISSING;
12176         }
12177         return 0;
12178 }
12179
12180 /*
12181  * Check referencer for normal (inlined) data ref
12182  * If len == 0, it will be resolved by searching in extent tree
12183  */
12184 static int check_extent_data_backref(struct btrfs_fs_info *fs_info,
12185                                      u64 root_id, u64 objectid, u64 offset,
12186                                      u64 bytenr, u64 len, u32 count)
12187 {
12188         struct btrfs_root *root;
12189         struct btrfs_root *extent_root = fs_info->extent_root;
12190         struct btrfs_key key;
12191         struct btrfs_path path;
12192         struct extent_buffer *leaf;
12193         struct btrfs_file_extent_item *fi;
12194         u32 found_count = 0;
12195         int slot;
12196         int ret = 0;
12197
12198         if (!len) {
12199                 key.objectid = bytenr;
12200                 key.type = BTRFS_EXTENT_ITEM_KEY;
12201                 key.offset = (u64)-1;
12202
12203                 btrfs_init_path(&path);
12204                 ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
12205                 if (ret < 0)
12206                         goto out;
12207                 ret = btrfs_previous_extent_item(extent_root, &path, bytenr);
12208                 if (ret)
12209                         goto out;
12210                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12211                 if (key.objectid != bytenr ||
12212                     key.type != BTRFS_EXTENT_ITEM_KEY)
12213                         goto out;
12214                 len = key.offset;
12215                 btrfs_release_path(&path);
12216         }
12217         key.objectid = root_id;
12218         key.type = BTRFS_ROOT_ITEM_KEY;
12219         key.offset = (u64)-1;
12220         btrfs_init_path(&path);
12221
12222         root = btrfs_read_fs_root(fs_info, &key);
12223         if (IS_ERR(root))
12224                 goto out;
12225
12226         key.objectid = objectid;
12227         key.type = BTRFS_EXTENT_DATA_KEY;
12228         /*
12229          * It can be nasty as data backref offset is
12230          * file offset - file extent offset, which is smaller or
12231          * equal to original backref offset.  The only special case is
12232          * overflow.  So we need to special check and do further search.
12233          */
12234         key.offset = offset & (1ULL << 63) ? 0 : offset;
12235
12236         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
12237         if (ret < 0)
12238                 goto out;
12239
12240         /*
12241          * Search afterwards to get correct one
12242          * NOTE: As we must do a comprehensive check on the data backref to
12243          * make sure the dref count also matches, we must iterate all file
12244          * extents for that inode.
12245          */
12246         while (1) {
12247                 leaf = path.nodes[0];
12248                 slot = path.slots[0];
12249
12250                 if (slot >= btrfs_header_nritems(leaf) ||
12251                     btrfs_header_owner(leaf) != root_id)
12252                         goto next;
12253                 btrfs_item_key_to_cpu(leaf, &key, slot);
12254                 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
12255                         break;
12256                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
12257                 /*
12258                  * Except normal disk bytenr and disk num bytes, we still
12259                  * need to do extra check on dbackref offset as
12260                  * dbackref offset = file_offset - file_extent_offset
12261                  *
12262                  * Also, we must check the leaf owner.
12263                  * In case of shared tree blocks (snapshots) we can inherit
12264                  * leaves from source snapshot.
12265                  * In that case, reference from source snapshot should not
12266                  * count.
12267                  */
12268                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr &&
12269                     btrfs_file_extent_disk_num_bytes(leaf, fi) == len &&
12270                     (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) ==
12271                     offset && btrfs_header_owner(leaf) == root_id)
12272                         found_count++;
12273
12274 next:
12275                 ret = btrfs_next_item(root, &path);
12276                 if (ret)
12277                         break;
12278         }
12279 out:
12280         btrfs_release_path(&path);
12281         if (found_count != count) {
12282                 error(
12283 "extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u",
12284                         bytenr, len, root_id, objectid, offset, count, found_count);
12285                 return REFERENCER_MISSING;
12286         }
12287         return 0;
12288 }
12289
12290 /*
12291  * Check if the referencer of a shared data backref exists
12292  */
12293 static int check_shared_data_backref(struct btrfs_fs_info *fs_info,
12294                                      u64 parent, u64 bytenr)
12295 {
12296         struct extent_buffer *eb;
12297         struct btrfs_key key;
12298         struct btrfs_file_extent_item *fi;
12299         u32 nr;
12300         int found_parent = 0;
12301         int i;
12302
12303         eb = read_tree_block(fs_info, parent, 0);
12304         if (!extent_buffer_uptodate(eb))
12305                 goto out;
12306
12307         nr = btrfs_header_nritems(eb);
12308         for (i = 0; i < nr; i++) {
12309                 btrfs_item_key_to_cpu(eb, &key, i);
12310                 if (key.type != BTRFS_EXTENT_DATA_KEY)
12311                         continue;
12312
12313                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
12314                 if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE)
12315                         continue;
12316
12317                 if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) {
12318                         found_parent = 1;
12319                         break;
12320                 }
12321         }
12322
12323 out:
12324         free_extent_buffer(eb);
12325         if (!found_parent) {
12326                 error("shared extent %llu referencer lost (parent: %llu)",
12327                         bytenr, parent);
12328                 return REFERENCER_MISSING;
12329         }
12330         return 0;
12331 }
12332
12333 /*
12334  * Only delete backref if REFERENCER_MISSING now
12335  *
12336  * Returns <0   the extent was deleted
12337  * Returns >0   the backref was deleted but extent still exists, returned value
12338  *               means error after repair
12339  * Returns  0   nothing happened
12340  */
12341 static int repair_extent_item(struct btrfs_trans_handle *trans,
12342                       struct btrfs_root *root, struct btrfs_path *path,
12343                       u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
12344                       u64 owner, u64 offset, int err)
12345 {
12346         struct btrfs_key old_key;
12347         int freed = 0;
12348         int ret;
12349
12350         btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]);
12351
12352         if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) {
12353                 /* delete the backref */
12354                 ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr,
12355                           num_bytes, parent, root_objectid, owner, offset);
12356                 if (!ret) {
12357                         freed = 1;
12358                         err &= ~REFERENCER_MISSING;
12359                         printf("Delete backref in extent [%llu %llu]\n",
12360                                bytenr, num_bytes);
12361                 } else {
12362                         error("fail to delete backref in extent [%llu %llu]",
12363                                bytenr, num_bytes);
12364                 }
12365         }
12366
12367         /* btrfs_free_extent may delete the extent */
12368         btrfs_release_path(path);
12369         ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0);
12370
12371         if (ret)
12372                 ret = -ENOENT;
12373         else if (freed)
12374                 ret = err;
12375         return ret;
12376 }
12377
12378 /*
12379  * This function will check a given extent item, including its backref and
12380  * itself (like crossing stripe boundary and type)
12381  *
12382  * Since we don't use extent_record anymore, introduce new error bit
12383  */
12384 static int check_extent_item(struct btrfs_trans_handle *trans,
12385                              struct btrfs_fs_info *fs_info,
12386                              struct btrfs_path *path)
12387 {
12388         struct btrfs_extent_item *ei;
12389         struct btrfs_extent_inline_ref *iref;
12390         struct btrfs_extent_data_ref *dref;
12391         struct extent_buffer *eb = path->nodes[0];
12392         unsigned long end;
12393         unsigned long ptr;
12394         int slot = path->slots[0];
12395         int type;
12396         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12397         u32 item_size = btrfs_item_size_nr(eb, slot);
12398         u64 flags;
12399         u64 offset;
12400         u64 parent;
12401         u64 num_bytes;
12402         u64 root_objectid;
12403         u64 owner;
12404         u64 owner_offset;
12405         int metadata = 0;
12406         int level;
12407         struct btrfs_key key;
12408         int ret;
12409         int err = 0;
12410
12411         btrfs_item_key_to_cpu(eb, &key, slot);
12412         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
12413                 bytes_used += key.offset;
12414                 num_bytes = key.offset;
12415         } else {
12416                 bytes_used += nodesize;
12417                 num_bytes = nodesize;
12418         }
12419
12420         if (item_size < sizeof(*ei)) {
12421                 /*
12422                  * COMPAT_EXTENT_TREE_V0 case, but it's already a super
12423                  * old thing when on disk format is still un-determined.
12424                  * No need to care about it anymore
12425                  */
12426                 error("unsupported COMPAT_EXTENT_TREE_V0 detected");
12427                 return -ENOTTY;
12428         }
12429
12430         ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
12431         flags = btrfs_extent_flags(eb, ei);
12432
12433         if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
12434                 metadata = 1;
12435         if (metadata && check_crossing_stripes(global_info, key.objectid,
12436                                                eb->len)) {
12437                 error("bad metadata [%llu, %llu) crossing stripe boundary",
12438                       key.objectid, key.objectid + nodesize);
12439                 err |= CROSSING_STRIPE_BOUNDARY;
12440         }
12441
12442         ptr = (unsigned long)(ei + 1);
12443
12444         if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) {
12445                 /* Old EXTENT_ITEM metadata */
12446                 struct btrfs_tree_block_info *info;
12447
12448                 info = (struct btrfs_tree_block_info *)ptr;
12449                 level = btrfs_tree_block_level(eb, info);
12450                 ptr += sizeof(struct btrfs_tree_block_info);
12451         } else {
12452                 /* New METADATA_ITEM */
12453                 level = key.offset;
12454         }
12455         end = (unsigned long)ei + item_size;
12456
12457 next:
12458         /* Reached extent item end normally */
12459         if (ptr == end)
12460                 goto out;
12461
12462         /* Beyond extent item end, wrong item size */
12463         if (ptr > end) {
12464                 err |= ITEM_SIZE_MISMATCH;
12465                 error("extent item at bytenr %llu slot %d has wrong size",
12466                         eb->start, slot);
12467                 goto out;
12468         }
12469
12470         parent = 0;
12471         root_objectid = 0;
12472         owner = 0;
12473         owner_offset = 0;
12474         /* Now check every backref in this extent item */
12475         iref = (struct btrfs_extent_inline_ref *)ptr;
12476         type = btrfs_extent_inline_ref_type(eb, iref);
12477         offset = btrfs_extent_inline_ref_offset(eb, iref);
12478         switch (type) {
12479         case BTRFS_TREE_BLOCK_REF_KEY:
12480                 root_objectid = offset;
12481                 owner = level;
12482                 ret = check_tree_block_backref(fs_info, offset, key.objectid,
12483                                                level);
12484                 err |= ret;
12485                 break;
12486         case BTRFS_SHARED_BLOCK_REF_KEY:
12487                 parent = offset;
12488                 ret = check_shared_block_backref(fs_info, offset, key.objectid,
12489                                                  level);
12490                 err |= ret;
12491                 break;
12492         case BTRFS_EXTENT_DATA_REF_KEY:
12493                 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
12494                 root_objectid = btrfs_extent_data_ref_root(eb, dref);
12495                 owner = btrfs_extent_data_ref_objectid(eb, dref);
12496                 owner_offset = btrfs_extent_data_ref_offset(eb, dref);
12497                 ret = check_extent_data_backref(fs_info, root_objectid, owner,
12498                                         owner_offset, key.objectid, key.offset,
12499                                         btrfs_extent_data_ref_count(eb, dref));
12500                 err |= ret;
12501                 break;
12502         case BTRFS_SHARED_DATA_REF_KEY:
12503                 parent = offset;
12504                 ret = check_shared_data_backref(fs_info, offset, key.objectid);
12505                 err |= ret;
12506                 break;
12507         default:
12508                 error("extent[%llu %d %llu] has unknown ref type: %d",
12509                         key.objectid, key.type, key.offset, type);
12510                 ret = UNKNOWN_TYPE;
12511                 err |= ret;
12512                 goto out;
12513         }
12514
12515         if (err && repair) {
12516                 ret = repair_extent_item(trans, fs_info->extent_root, path,
12517                          key.objectid, num_bytes, parent, root_objectid,
12518                          owner, owner_offset, ret);
12519                 if (ret < 0)
12520                         goto out;
12521                 if (ret) {
12522                         goto next;
12523                         err = ret;
12524                 }
12525         }
12526
12527         ptr += btrfs_extent_inline_ref_size(type);
12528         goto next;
12529
12530 out:
12531         return err;
12532 }
12533
12534 /*
12535  * Check if a dev extent item is referred correctly by its chunk
12536  */
12537 static int check_dev_extent_item(struct btrfs_fs_info *fs_info,
12538                                  struct extent_buffer *eb, int slot)
12539 {
12540         struct btrfs_root *chunk_root = fs_info->chunk_root;
12541         struct btrfs_dev_extent *ptr;
12542         struct btrfs_path path;
12543         struct btrfs_key chunk_key;
12544         struct btrfs_key devext_key;
12545         struct btrfs_chunk *chunk;
12546         struct extent_buffer *l;
12547         int num_stripes;
12548         u64 length;
12549         int i;
12550         int found_chunk = 0;
12551         int ret;
12552
12553         btrfs_item_key_to_cpu(eb, &devext_key, slot);
12554         ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent);
12555         length = btrfs_dev_extent_length(eb, ptr);
12556
12557         chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr);
12558         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12559         chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr);
12560
12561         btrfs_init_path(&path);
12562         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12563         if (ret)
12564                 goto out;
12565
12566         l = path.nodes[0];
12567         chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk);
12568         ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0],
12569                                       chunk_key.offset);
12570         if (ret < 0)
12571                 goto out;
12572
12573         if (btrfs_stripe_length(fs_info, l, chunk) != length)
12574                 goto out;
12575
12576         num_stripes = btrfs_chunk_num_stripes(l, chunk);
12577         for (i = 0; i < num_stripes; i++) {
12578                 u64 devid = btrfs_stripe_devid_nr(l, chunk, i);
12579                 u64 offset = btrfs_stripe_offset_nr(l, chunk, i);
12580
12581                 if (devid == devext_key.objectid &&
12582                     offset == devext_key.offset) {
12583                         found_chunk = 1;
12584                         break;
12585                 }
12586         }
12587 out:
12588         btrfs_release_path(&path);
12589         if (!found_chunk) {
12590                 error(
12591                 "device extent[%llu, %llu, %llu] did not find the related chunk",
12592                         devext_key.objectid, devext_key.offset, length);
12593                 return REFERENCER_MISSING;
12594         }
12595         return 0;
12596 }
12597
12598 /*
12599  * Check if the used space is correct with the dev item
12600  */
12601 static int check_dev_item(struct btrfs_fs_info *fs_info,
12602                           struct extent_buffer *eb, int slot)
12603 {
12604         struct btrfs_root *dev_root = fs_info->dev_root;
12605         struct btrfs_dev_item *dev_item;
12606         struct btrfs_path path;
12607         struct btrfs_key key;
12608         struct btrfs_dev_extent *ptr;
12609         u64 total_bytes;
12610         u64 dev_id;
12611         u64 used;
12612         u64 total = 0;
12613         int ret;
12614
12615         dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item);
12616         dev_id = btrfs_device_id(eb, dev_item);
12617         used = btrfs_device_bytes_used(eb, dev_item);
12618         total_bytes = btrfs_device_total_bytes(eb, dev_item);
12619
12620         key.objectid = dev_id;
12621         key.type = BTRFS_DEV_EXTENT_KEY;
12622         key.offset = 0;
12623
12624         btrfs_init_path(&path);
12625         ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0);
12626         if (ret < 0) {
12627                 btrfs_item_key_to_cpu(eb, &key, slot);
12628                 error("cannot find any related dev extent for dev[%llu, %u, %llu]",
12629                         key.objectid, key.type, key.offset);
12630                 btrfs_release_path(&path);
12631                 return REFERENCER_MISSING;
12632         }
12633
12634         /* Iterate dev_extents to calculate the used space of a device */
12635         while (1) {
12636                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
12637                         goto next;
12638
12639                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
12640                 if (key.objectid > dev_id)
12641                         break;
12642                 if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id)
12643                         goto next;
12644
12645                 ptr = btrfs_item_ptr(path.nodes[0], path.slots[0],
12646                                      struct btrfs_dev_extent);
12647                 total += btrfs_dev_extent_length(path.nodes[0], ptr);
12648 next:
12649                 ret = btrfs_next_item(dev_root, &path);
12650                 if (ret)
12651                         break;
12652         }
12653         btrfs_release_path(&path);
12654
12655         if (used != total) {
12656                 btrfs_item_key_to_cpu(eb, &key, slot);
12657                 error(
12658 "Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]",
12659                         total, used, BTRFS_ROOT_TREE_OBJECTID,
12660                         BTRFS_DEV_EXTENT_KEY, dev_id);
12661                 return ACCOUNTING_MISMATCH;
12662         }
12663         check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize);
12664
12665         return 0;
12666 }
12667
12668 /*
12669  * Check a block group item with its referener (chunk) and its used space
12670  * with extent/metadata item
12671  */
12672 static int check_block_group_item(struct btrfs_fs_info *fs_info,
12673                                   struct extent_buffer *eb, int slot)
12674 {
12675         struct btrfs_root *extent_root = fs_info->extent_root;
12676         struct btrfs_root *chunk_root = fs_info->chunk_root;
12677         struct btrfs_block_group_item *bi;
12678         struct btrfs_block_group_item bg_item;
12679         struct btrfs_path path;
12680         struct btrfs_key bg_key;
12681         struct btrfs_key chunk_key;
12682         struct btrfs_key extent_key;
12683         struct btrfs_chunk *chunk;
12684         struct extent_buffer *leaf;
12685         struct btrfs_extent_item *ei;
12686         u32 nodesize = btrfs_super_nodesize(fs_info->super_copy);
12687         u64 flags;
12688         u64 bg_flags;
12689         u64 used;
12690         u64 total = 0;
12691         int ret;
12692         int err = 0;
12693
12694         btrfs_item_key_to_cpu(eb, &bg_key, slot);
12695         bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item);
12696         read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item));
12697         used = btrfs_block_group_used(&bg_item);
12698         bg_flags = btrfs_block_group_flags(&bg_item);
12699
12700         chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
12701         chunk_key.type = BTRFS_CHUNK_ITEM_KEY;
12702         chunk_key.offset = bg_key.objectid;
12703
12704         btrfs_init_path(&path);
12705         /* Search for the referencer chunk */
12706         ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0);
12707         if (ret) {
12708                 error(
12709                 "block group[%llu %llu] did not find the related chunk item",
12710                         bg_key.objectid, bg_key.offset);
12711                 err |= REFERENCER_MISSING;
12712         } else {
12713                 chunk = btrfs_item_ptr(path.nodes[0], path.slots[0],
12714                                         struct btrfs_chunk);
12715                 if (btrfs_chunk_length(path.nodes[0], chunk) !=
12716                                                 bg_key.offset) {
12717                         error(
12718         "block group[%llu %llu] related chunk item length does not match",
12719                                 bg_key.objectid, bg_key.offset);
12720                         err |= REFERENCER_MISMATCH;
12721                 }
12722         }
12723         btrfs_release_path(&path);
12724
12725         /* Search from the block group bytenr */
12726         extent_key.objectid = bg_key.objectid;
12727         extent_key.type = 0;
12728         extent_key.offset = 0;
12729
12730         btrfs_init_path(&path);
12731         ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0);
12732         if (ret < 0)
12733                 goto out;
12734
12735         /* Iterate extent tree to account used space */
12736         while (1) {
12737                 leaf = path.nodes[0];
12738
12739                 /* Search slot can point to the last item beyond leaf nritems */
12740                 if (path.slots[0] >= btrfs_header_nritems(leaf))
12741                         goto next;
12742
12743                 btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]);
12744                 if (extent_key.objectid >= bg_key.objectid + bg_key.offset)
12745                         break;
12746
12747                 if (extent_key.type != BTRFS_METADATA_ITEM_KEY &&
12748                     extent_key.type != BTRFS_EXTENT_ITEM_KEY)
12749                         goto next;
12750                 if (extent_key.objectid < bg_key.objectid)
12751                         goto next;
12752
12753                 if (extent_key.type == BTRFS_METADATA_ITEM_KEY)
12754                         total += nodesize;
12755                 else
12756                         total += extent_key.offset;
12757
12758                 ei = btrfs_item_ptr(leaf, path.slots[0],
12759                                     struct btrfs_extent_item);
12760                 flags = btrfs_extent_flags(leaf, ei);
12761                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
12762                         if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) {
12763                                 error(
12764                         "bad extent[%llu, %llu) type mismatch with chunk",
12765                                         extent_key.objectid,
12766                                         extent_key.objectid + extent_key.offset);
12767                                 err |= CHUNK_TYPE_MISMATCH;
12768                         }
12769                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
12770                         if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM |
12771                                     BTRFS_BLOCK_GROUP_METADATA))) {
12772                                 error(
12773                         "bad extent[%llu, %llu) type mismatch with chunk",
12774                                         extent_key.objectid,
12775                                         extent_key.objectid + nodesize);
12776                                 err |= CHUNK_TYPE_MISMATCH;
12777                         }
12778                 }
12779 next:
12780                 ret = btrfs_next_item(extent_root, &path);
12781                 if (ret)
12782                         break;
12783         }
12784
12785 out:
12786         btrfs_release_path(&path);
12787
12788         if (total != used) {
12789                 error(
12790                 "block group[%llu %llu] used %llu but extent items used %llu",
12791                         bg_key.objectid, bg_key.offset, used, total);
12792                 err |= BG_ACCOUNTING_ERROR;
12793         }
12794         return err;
12795 }
12796
12797 /*
12798  * Add block group item to the extent tree if @err contains REFERENCER_MISSING.
12799  * FIXME: We still need to repair error of dev_item.
12800  *
12801  * Returns error after repair.
12802  */
12803 static int repair_chunk_item(struct btrfs_trans_handle *trans,
12804                              struct btrfs_root *chunk_root,
12805                              struct btrfs_path *path, int err)
12806 {
12807         struct btrfs_chunk *chunk;
12808         struct btrfs_key chunk_key;
12809         struct extent_buffer *eb = path->nodes[0];
12810         u64 length;
12811         int slot = path->slots[0];
12812         u64 type;
12813         int ret = 0;
12814
12815         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12816         if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY)
12817                 return err;
12818         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12819         type = btrfs_chunk_type(path->nodes[0], chunk);
12820         length = btrfs_chunk_length(eb, chunk);
12821
12822         if (err & REFERENCER_MISSING) {
12823                 ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0,
12824                                              type, chunk_key.offset, length);
12825                 if (ret) {
12826                         error("fail to add block group item[%llu %llu]",
12827                               chunk_key.offset, length);
12828                         goto out;
12829                 } else {
12830                         err &= ~REFERENCER_MISSING;
12831                         printf("Added block group item[%llu %llu]\n",
12832                                chunk_key.offset, length);
12833                 }
12834         }
12835
12836 out:
12837         return err;
12838 }
12839
12840 /*
12841  * Check a chunk item.
12842  * Including checking all referred dev_extents and block group
12843  */
12844 static int check_chunk_item(struct btrfs_fs_info *fs_info,
12845                             struct extent_buffer *eb, int slot)
12846 {
12847         struct btrfs_root *extent_root = fs_info->extent_root;
12848         struct btrfs_root *dev_root = fs_info->dev_root;
12849         struct btrfs_path path;
12850         struct btrfs_key chunk_key;
12851         struct btrfs_key bg_key;
12852         struct btrfs_key devext_key;
12853         struct btrfs_chunk *chunk;
12854         struct extent_buffer *leaf;
12855         struct btrfs_block_group_item *bi;
12856         struct btrfs_block_group_item bg_item;
12857         struct btrfs_dev_extent *ptr;
12858         u64 length;
12859         u64 chunk_end;
12860         u64 stripe_len;
12861         u64 type;
12862         int num_stripes;
12863         u64 offset;
12864         u64 objectid;
12865         int i;
12866         int ret;
12867         int err = 0;
12868
12869         btrfs_item_key_to_cpu(eb, &chunk_key, slot);
12870         chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk);
12871         length = btrfs_chunk_length(eb, chunk);
12872         chunk_end = chunk_key.offset + length;
12873         ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot,
12874                                       chunk_key.offset);
12875         if (ret < 0) {
12876                 error("chunk[%llu %llu) is invalid", chunk_key.offset,
12877                         chunk_end);
12878                 err |= BYTES_UNALIGNED | UNKNOWN_TYPE;
12879                 goto out;
12880         }
12881         type = btrfs_chunk_type(eb, chunk);
12882
12883         bg_key.objectid = chunk_key.offset;
12884         bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
12885         bg_key.offset = length;
12886
12887         btrfs_init_path(&path);
12888         ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0);
12889         if (ret) {
12890                 error(
12891                 "chunk[%llu %llu) did not find the related block group item",
12892                         chunk_key.offset, chunk_end);
12893                 err |= REFERENCER_MISSING;
12894         } else{
12895                 leaf = path.nodes[0];
12896                 bi = btrfs_item_ptr(leaf, path.slots[0],
12897                                     struct btrfs_block_group_item);
12898                 read_extent_buffer(leaf, &bg_item, (unsigned long)bi,
12899                                    sizeof(bg_item));
12900                 if (btrfs_block_group_flags(&bg_item) != type) {
12901                         error(
12902 "chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu",
12903                                 chunk_key.offset, chunk_end, type,
12904                                 btrfs_block_group_flags(&bg_item));
12905                         err |= REFERENCER_MISSING;
12906                 }
12907         }
12908
12909         num_stripes = btrfs_chunk_num_stripes(eb, chunk);
12910         stripe_len = btrfs_stripe_length(fs_info, eb, chunk);
12911         for (i = 0; i < num_stripes; i++) {
12912                 btrfs_release_path(&path);
12913                 btrfs_init_path(&path);
12914                 devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i);
12915                 devext_key.type = BTRFS_DEV_EXTENT_KEY;
12916                 devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i);
12917
12918                 ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path,
12919                                         0, 0);
12920                 if (ret)
12921                         goto not_match_dev;
12922
12923                 leaf = path.nodes[0];
12924                 ptr = btrfs_item_ptr(leaf, path.slots[0],
12925                                      struct btrfs_dev_extent);
12926                 objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr);
12927                 offset = btrfs_dev_extent_chunk_offset(leaf, ptr);
12928                 if (objectid != chunk_key.objectid ||
12929                     offset != chunk_key.offset ||
12930                     btrfs_dev_extent_length(leaf, ptr) != stripe_len)
12931                         goto not_match_dev;
12932                 continue;
12933 not_match_dev:
12934                 err |= BACKREF_MISSING;
12935                 error(
12936                 "chunk[%llu %llu) stripe %d did not find the related dev extent",
12937                         chunk_key.objectid, chunk_end, i);
12938                 continue;
12939         }
12940         btrfs_release_path(&path);
12941 out:
12942         return err;
12943 }
12944
12945 static int delete_extent_tree_item(struct btrfs_trans_handle *trans,
12946                                    struct btrfs_root *root,
12947                                    struct btrfs_path *path)
12948 {
12949         struct btrfs_key key;
12950         int ret = 0;
12951
12952         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
12953         btrfs_release_path(path);
12954         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
12955         if (ret) {
12956                 ret = -ENOENT;
12957                 goto out;
12958         }
12959
12960         ret = btrfs_del_item(trans, root, path);
12961         if (ret)
12962                 goto out;
12963
12964         if (path->slots[0] == 0)
12965                 btrfs_prev_leaf(root, path);
12966         else
12967                 path->slots[0]--;
12968 out:
12969         if (ret)
12970                 error("failed to delete root %llu item[%llu, %u, %llu]",
12971                       root->objectid, key.objectid, key.type, key.offset);
12972         else
12973                 printf("Deleted root %llu item[%llu, %u, %llu]\n",
12974                        root->objectid, key.objectid, key.type, key.offset);
12975         return ret;
12976 }
12977
12978 /*
12979  * Main entry function to check known items and update related accounting info
12980  */
12981 static int check_leaf_items(struct btrfs_trans_handle *trans,
12982                             struct btrfs_root *root, struct btrfs_path *path,
12983                             struct node_refs *nrefs, int account_bytes)
12984 {
12985         struct btrfs_fs_info *fs_info = root->fs_info;
12986         struct btrfs_key key;
12987         struct extent_buffer *eb;
12988         int slot;
12989         int type;
12990         struct btrfs_extent_data_ref *dref;
12991         int ret = 0;
12992         int err = 0;
12993
12994 again:
12995         eb = path->nodes[0];
12996         slot = path->slots[0];
12997         if (slot >= btrfs_header_nritems(eb)) {
12998                 if (slot == 0) {
12999                         error("empty leaf [%llu %u] root %llu", eb->start,
13000                                 root->fs_info->nodesize, root->objectid);
13001                         err |= EIO;
13002                 }
13003                 goto out;
13004         }
13005
13006         btrfs_item_key_to_cpu(eb, &key, slot);
13007         type = key.type;
13008
13009         switch (type) {
13010         case BTRFS_EXTENT_DATA_KEY:
13011                 ret = check_extent_data_item(root, path, nrefs, account_bytes);
13012                 if (repair && ret)
13013                         ret = repair_extent_data_item(trans, root, path, nrefs,
13014                                                       ret);
13015                 err |= ret;
13016                 break;
13017         case BTRFS_BLOCK_GROUP_ITEM_KEY:
13018                 ret = check_block_group_item(fs_info, eb, slot);
13019                 if (repair &&
13020                     ret & REFERENCER_MISSING)
13021                         ret = delete_extent_tree_item(trans, root, path);
13022                 err |= ret;
13023                 break;
13024         case BTRFS_DEV_ITEM_KEY:
13025                 ret = check_dev_item(fs_info, eb, slot);
13026                 err |= ret;
13027                 break;
13028         case BTRFS_CHUNK_ITEM_KEY:
13029                 ret = check_chunk_item(fs_info, eb, slot);
13030                 if (repair && ret)
13031                         ret = repair_chunk_item(trans, root, path, ret);
13032                 err |= ret;
13033                 break;
13034         case BTRFS_DEV_EXTENT_KEY:
13035                 ret = check_dev_extent_item(fs_info, eb, slot);
13036                 err |= ret;
13037                 break;
13038         case BTRFS_EXTENT_ITEM_KEY:
13039         case BTRFS_METADATA_ITEM_KEY:
13040                 ret = check_extent_item(trans, fs_info, path);
13041                 err |= ret;
13042                 break;
13043         case BTRFS_EXTENT_CSUM_KEY:
13044                 total_csum_bytes += btrfs_item_size_nr(eb, slot);
13045                 err |= ret;
13046                 break;
13047         case BTRFS_TREE_BLOCK_REF_KEY:
13048                 ret = check_tree_block_backref(fs_info, key.offset,
13049                                                key.objectid, -1);
13050                 if (repair &&
13051                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13052                         ret = delete_extent_tree_item(trans, root, path);
13053                 err |= ret;
13054                 break;
13055         case BTRFS_EXTENT_DATA_REF_KEY:
13056                 dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref);
13057                 ret = check_extent_data_backref(fs_info,
13058                                 btrfs_extent_data_ref_root(eb, dref),
13059                                 btrfs_extent_data_ref_objectid(eb, dref),
13060                                 btrfs_extent_data_ref_offset(eb, dref),
13061                                 key.objectid, 0,
13062                                 btrfs_extent_data_ref_count(eb, dref));
13063                 if (repair &&
13064                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13065                         ret = delete_extent_tree_item(trans, root, path);
13066                 err |= ret;
13067                 break;
13068         case BTRFS_SHARED_BLOCK_REF_KEY:
13069                 ret = check_shared_block_backref(fs_info, key.offset,
13070                                                  key.objectid, -1);
13071                 if (repair &&
13072                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13073                         ret = delete_extent_tree_item(trans, root, path);
13074                 err |= ret;
13075                 break;
13076         case BTRFS_SHARED_DATA_REF_KEY:
13077                 ret = check_shared_data_backref(fs_info, key.offset,
13078                                                 key.objectid);
13079                 if (repair &&
13080                     ret & (REFERENCER_MISMATCH | REFERENCER_MISSING))
13081                         ret = delete_extent_tree_item(trans, root, path);
13082                 err |= ret;
13083                 break;
13084         default:
13085                 break;
13086         }
13087
13088         ++path->slots[0];
13089         goto again;
13090 out:
13091         return err;
13092 }
13093
13094 /*
13095  * Low memory usage version check_chunks_and_extents.
13096  */
13097 static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info)
13098 {
13099         struct btrfs_trans_handle *trans = NULL;
13100         struct btrfs_path path;
13101         struct btrfs_key old_key;
13102         struct btrfs_key key;
13103         struct btrfs_root *root1;
13104         struct btrfs_root *root;
13105         struct btrfs_root *cur_root;
13106         int err = 0;
13107         int ret;
13108
13109         root = fs_info->fs_root;
13110
13111         if (repair) {
13112                 trans = btrfs_start_transaction(fs_info->extent_root, 1);
13113                 if (IS_ERR(trans)) {
13114                         error("failed to start transaction before check");
13115                         return PTR_ERR(trans);
13116                 }
13117         }
13118
13119         root1 = root->fs_info->chunk_root;
13120         ret = check_btrfs_root(trans, root1, 0, 1);
13121         err |= ret;
13122
13123         root1 = root->fs_info->tree_root;
13124         ret = check_btrfs_root(trans, root1, 0, 1);
13125         err |= ret;
13126
13127         btrfs_init_path(&path);
13128         key.objectid = BTRFS_EXTENT_TREE_OBJECTID;
13129         key.offset = 0;
13130         key.type = BTRFS_ROOT_ITEM_KEY;
13131
13132         ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0);
13133         if (ret) {
13134                 error("cannot find extent tree in tree_root");
13135                 goto out;
13136         }
13137
13138         while (1) {
13139                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13140                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13141                         goto next;
13142                 old_key = key;
13143                 key.offset = (u64)-1;
13144
13145                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13146                         cur_root = btrfs_read_fs_root_no_cache(root->fs_info,
13147                                         &key);
13148                 else
13149                         cur_root = btrfs_read_fs_root(root->fs_info, &key);
13150                 if (IS_ERR(cur_root) || !cur_root) {
13151                         error("failed to read tree: %lld", key.objectid);
13152                         goto next;
13153                 }
13154
13155                 ret = check_btrfs_root(trans, cur_root, 0, 1);
13156                 err |= ret;
13157
13158                 if (key.objectid == BTRFS_TREE_RELOC_OBJECTID)
13159                         btrfs_free_fs_root(cur_root);
13160
13161                 btrfs_release_path(&path);
13162                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
13163                                         &old_key, &path, 0, 0);
13164                 if (ret)
13165                         goto out;
13166 next:
13167                 ret = btrfs_next_item(root1, &path);
13168                 if (ret)
13169                         goto out;
13170         }
13171 out:
13172
13173         /* if repair, update block accounting */
13174         if (repair) {
13175                 ret = btrfs_fix_block_accounting(trans, root);
13176                 if (ret)
13177                         err |= ret;
13178                 else
13179                         err &= ~BG_ACCOUNTING_ERROR;
13180         }
13181
13182         if (trans)
13183                 btrfs_commit_transaction(trans, root->fs_info->extent_root);
13184
13185         btrfs_release_path(&path);
13186
13187         return err;
13188 }
13189
13190 static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info)
13191 {
13192         int ret;
13193
13194         if (!ctx.progress_enabled)
13195                 fprintf(stderr, "checking extents\n");
13196         if (check_mode == CHECK_MODE_LOWMEM)
13197                 ret = check_chunks_and_extents_v2(fs_info);
13198         else
13199                 ret = check_chunks_and_extents(fs_info);
13200
13201         /* Also repair device size related problems */
13202         if (repair && !ret) {
13203                 ret = btrfs_fix_device_and_super_size(fs_info);
13204                 if (ret > 0)
13205                         ret = 0;
13206         }
13207         return ret;
13208 }
13209
13210 static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans,
13211                            struct btrfs_root *root, int overwrite)
13212 {
13213         struct extent_buffer *c;
13214         struct extent_buffer *old = root->node;
13215         int level;
13216         int ret;
13217         struct btrfs_disk_key disk_key = {0,0,0};
13218
13219         level = 0;
13220
13221         if (overwrite) {
13222                 c = old;
13223                 extent_buffer_get(c);
13224                 goto init;
13225         }
13226         c = btrfs_alloc_free_block(trans, root,
13227                                    root->fs_info->nodesize,
13228                                    root->root_key.objectid,
13229                                    &disk_key, level, 0, 0);
13230         if (IS_ERR(c)) {
13231                 c = old;
13232                 extent_buffer_get(c);
13233                 overwrite = 1;
13234         }
13235 init:
13236         memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
13237         btrfs_set_header_level(c, level);
13238         btrfs_set_header_bytenr(c, c->start);
13239         btrfs_set_header_generation(c, trans->transid);
13240         btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
13241         btrfs_set_header_owner(c, root->root_key.objectid);
13242
13243         write_extent_buffer(c, root->fs_info->fsid,
13244                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
13245
13246         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
13247                             btrfs_header_chunk_tree_uuid(c),
13248                             BTRFS_UUID_SIZE);
13249
13250         btrfs_mark_buffer_dirty(c);
13251         /*
13252          * this case can happen in the following case:
13253          *
13254          * 1.overwrite previous root.
13255          *
13256          * 2.reinit reloc data root, this is because we skip pin
13257          * down reloc data tree before which means we can allocate
13258          * same block bytenr here.
13259          */
13260         if (old->start == c->start) {
13261                 btrfs_set_root_generation(&root->root_item,
13262                                           trans->transid);
13263                 root->root_item.level = btrfs_header_level(root->node);
13264                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
13265                                         &root->root_key, &root->root_item);
13266                 if (ret) {
13267                         free_extent_buffer(c);
13268                         return ret;
13269                 }
13270         }
13271         free_extent_buffer(old);
13272         root->node = c;
13273         add_root_to_dirty_list(root);
13274         return 0;
13275 }
13276
13277 static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info,
13278                                 struct extent_buffer *eb, int tree_root)
13279 {
13280         struct extent_buffer *tmp;
13281         struct btrfs_root_item *ri;
13282         struct btrfs_key key;
13283         u64 bytenr;
13284         int level = btrfs_header_level(eb);
13285         int nritems;
13286         int ret;
13287         int i;
13288
13289         /*
13290          * If we have pinned this block before, don't pin it again.
13291          * This can not only avoid forever loop with broken filesystem
13292          * but also give us some speedups.
13293          */
13294         if (test_range_bit(&fs_info->pinned_extents, eb->start,
13295                            eb->start + eb->len - 1, EXTENT_DIRTY, 0))
13296                 return 0;
13297
13298         btrfs_pin_extent(fs_info, eb->start, eb->len);
13299
13300         nritems = btrfs_header_nritems(eb);
13301         for (i = 0; i < nritems; i++) {
13302                 if (level == 0) {
13303                         btrfs_item_key_to_cpu(eb, &key, i);
13304                         if (key.type != BTRFS_ROOT_ITEM_KEY)
13305                                 continue;
13306                         /* Skip the extent root and reloc roots */
13307                         if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
13308                             key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
13309                             key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
13310                                 continue;
13311                         ri = btrfs_item_ptr(eb, i, struct btrfs_root_item);
13312                         bytenr = btrfs_disk_root_bytenr(eb, ri);
13313
13314                         /*
13315                          * If at any point we start needing the real root we
13316                          * will have to build a stump root for the root we are
13317                          * in, but for now this doesn't actually use the root so
13318                          * just pass in extent_root.
13319                          */
13320                         tmp = read_tree_block(fs_info, bytenr, 0);
13321                         if (!extent_buffer_uptodate(tmp)) {
13322                                 fprintf(stderr, "Error reading root block\n");
13323                                 return -EIO;
13324                         }
13325                         ret = pin_down_tree_blocks(fs_info, tmp, 0);
13326                         free_extent_buffer(tmp);
13327                         if (ret)
13328                                 return ret;
13329                 } else {
13330                         bytenr = btrfs_node_blockptr(eb, i);
13331
13332                         /* If we aren't the tree root don't read the block */
13333                         if (level == 1 && !tree_root) {
13334                                 btrfs_pin_extent(fs_info, bytenr,
13335                                                 fs_info->nodesize);
13336                                 continue;
13337                         }
13338
13339                         tmp = read_tree_block(fs_info, bytenr, 0);
13340                         if (!extent_buffer_uptodate(tmp)) {
13341                                 fprintf(stderr, "Error reading tree block\n");
13342                                 return -EIO;
13343                         }
13344                         ret = pin_down_tree_blocks(fs_info, tmp, tree_root);
13345                         free_extent_buffer(tmp);
13346                         if (ret)
13347                                 return ret;
13348                 }
13349         }
13350
13351         return 0;
13352 }
13353
13354 static int pin_metadata_blocks(struct btrfs_fs_info *fs_info)
13355 {
13356         int ret;
13357
13358         ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0);
13359         if (ret)
13360                 return ret;
13361
13362         return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1);
13363 }
13364
13365 static int reset_block_groups(struct btrfs_fs_info *fs_info)
13366 {
13367         struct btrfs_block_group_cache *cache;
13368         struct btrfs_path path;
13369         struct extent_buffer *leaf;
13370         struct btrfs_chunk *chunk;
13371         struct btrfs_key key;
13372         int ret;
13373         u64 start;
13374
13375         btrfs_init_path(&path);
13376         key.objectid = 0;
13377         key.type = BTRFS_CHUNK_ITEM_KEY;
13378         key.offset = 0;
13379         ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, &path, 0, 0);
13380         if (ret < 0) {
13381                 btrfs_release_path(&path);
13382                 return ret;
13383         }
13384
13385         /*
13386          * We do this in case the block groups were screwed up and had alloc
13387          * bits that aren't actually set on the chunks.  This happens with
13388          * restored images every time and could happen in real life I guess.
13389          */
13390         fs_info->avail_data_alloc_bits = 0;
13391         fs_info->avail_metadata_alloc_bits = 0;
13392         fs_info->avail_system_alloc_bits = 0;
13393
13394         /* First we need to create the in-memory block groups */
13395         while (1) {
13396                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13397                         ret = btrfs_next_leaf(fs_info->chunk_root, &path);
13398                         if (ret < 0) {
13399                                 btrfs_release_path(&path);
13400                                 return ret;
13401                         }
13402                         if (ret) {
13403                                 ret = 0;
13404                                 break;
13405                         }
13406                 }
13407                 leaf = path.nodes[0];
13408                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13409                 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
13410                         path.slots[0]++;
13411                         continue;
13412                 }
13413
13414                 chunk = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_chunk);
13415                 btrfs_add_block_group(fs_info, 0,
13416                                       btrfs_chunk_type(leaf, chunk), key.offset,
13417                                       btrfs_chunk_length(leaf, chunk));
13418                 set_extent_dirty(&fs_info->free_space_cache, key.offset,
13419                                  key.offset + btrfs_chunk_length(leaf, chunk));
13420                 path.slots[0]++;
13421         }
13422         start = 0;
13423         while (1) {
13424                 cache = btrfs_lookup_first_block_group(fs_info, start);
13425                 if (!cache)
13426                         break;
13427                 cache->cached = 1;
13428                 start = cache->key.objectid + cache->key.offset;
13429         }
13430
13431         btrfs_release_path(&path);
13432         return 0;
13433 }
13434
13435 static int reset_balance(struct btrfs_trans_handle *trans,
13436                          struct btrfs_fs_info *fs_info)
13437 {
13438         struct btrfs_root *root = fs_info->tree_root;
13439         struct btrfs_path path;
13440         struct extent_buffer *leaf;
13441         struct btrfs_key key;
13442         int del_slot, del_nr = 0;
13443         int ret;
13444         int found = 0;
13445
13446         btrfs_init_path(&path);
13447         key.objectid = BTRFS_BALANCE_OBJECTID;
13448         key.type = BTRFS_BALANCE_ITEM_KEY;
13449         key.offset = 0;
13450         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13451         if (ret) {
13452                 if (ret > 0)
13453                         ret = 0;
13454                 if (!ret)
13455                         goto reinit_data_reloc;
13456                 else
13457                         goto out;
13458         }
13459
13460         ret = btrfs_del_item(trans, root, &path);
13461         if (ret)
13462                 goto out;
13463         btrfs_release_path(&path);
13464
13465         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
13466         key.type = BTRFS_ROOT_ITEM_KEY;
13467         key.offset = 0;
13468         ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
13469         if (ret < 0)
13470                 goto out;
13471         while (1) {
13472                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13473                         if (!found)
13474                                 break;
13475
13476                         if (del_nr) {
13477                                 ret = btrfs_del_items(trans, root, &path,
13478                                                       del_slot, del_nr);
13479                                 del_nr = 0;
13480                                 if (ret)
13481                                         goto out;
13482                         }
13483                         key.offset++;
13484                         btrfs_release_path(&path);
13485
13486                         found = 0;
13487                         ret = btrfs_search_slot(trans, root, &key, &path,
13488                                                 -1, 1);
13489                         if (ret < 0)
13490                                 goto out;
13491                         continue;
13492                 }
13493                 found = 1;
13494                 leaf = path.nodes[0];
13495                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13496                 if (key.objectid > BTRFS_TREE_RELOC_OBJECTID)
13497                         break;
13498                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
13499                         path.slots[0]++;
13500                         continue;
13501                 }
13502                 if (!del_nr) {
13503                         del_slot = path.slots[0];
13504                         del_nr = 1;
13505                 } else {
13506                         del_nr++;
13507                 }
13508                 path.slots[0]++;
13509         }
13510
13511         if (del_nr) {
13512                 ret = btrfs_del_items(trans, root, &path, del_slot, del_nr);
13513                 if (ret)
13514                         goto out;
13515         }
13516         btrfs_release_path(&path);
13517
13518 reinit_data_reloc:
13519         key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
13520         key.type = BTRFS_ROOT_ITEM_KEY;
13521         key.offset = (u64)-1;
13522         root = btrfs_read_fs_root(fs_info, &key);
13523         if (IS_ERR(root)) {
13524                 fprintf(stderr, "Error reading data reloc tree\n");
13525                 ret = PTR_ERR(root);
13526                 goto out;
13527         }
13528         record_root_in_trans(trans, root);
13529         ret = btrfs_fsck_reinit_root(trans, root, 0);
13530         if (ret)
13531                 goto out;
13532         ret = btrfs_make_root_dir(trans, root, BTRFS_FIRST_FREE_OBJECTID);
13533 out:
13534         btrfs_release_path(&path);
13535         return ret;
13536 }
13537
13538 static int reinit_extent_tree(struct btrfs_trans_handle *trans,
13539                               struct btrfs_fs_info *fs_info)
13540 {
13541         u64 start = 0;
13542         int ret;
13543
13544         /*
13545          * The only reason we don't do this is because right now we're just
13546          * walking the trees we find and pinning down their bytes, we don't look
13547          * at any of the leaves.  In order to do mixed groups we'd have to check
13548          * the leaves of any fs roots and pin down the bytes for any file
13549          * extents we find.  Not hard but why do it if we don't have to?
13550          */
13551         if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
13552                 fprintf(stderr, "We don't support re-initing the extent tree "
13553                         "for mixed block groups yet, please notify a btrfs "
13554                         "developer you want to do this so they can add this "
13555                         "functionality.\n");
13556                 return -EINVAL;
13557         }
13558
13559         /*
13560          * first we need to walk all of the trees except the extent tree and pin
13561          * down the bytes that are in use so we don't overwrite any existing
13562          * metadata.
13563          */
13564         ret = pin_metadata_blocks(fs_info);
13565         if (ret) {
13566                 fprintf(stderr, "error pinning down used bytes\n");
13567                 return ret;
13568         }
13569
13570         /*
13571          * Need to drop all the block groups since we're going to recreate all
13572          * of them again.
13573          */
13574         btrfs_free_block_groups(fs_info);
13575         ret = reset_block_groups(fs_info);
13576         if (ret) {
13577                 fprintf(stderr, "error resetting the block groups\n");
13578                 return ret;
13579         }
13580
13581         /* Ok we can allocate now, reinit the extent root */
13582         ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 0);
13583         if (ret) {
13584                 fprintf(stderr, "extent root initialization failed\n");
13585                 /*
13586                  * When the transaction code is updated we should end the
13587                  * transaction, but for now progs only knows about commit so
13588                  * just return an error.
13589                  */
13590                 return ret;
13591         }
13592
13593         /*
13594          * Now we have all the in-memory block groups setup so we can make
13595          * allocations properly, and the metadata we care about is safe since we
13596          * pinned all of it above.
13597          */
13598         while (1) {
13599                 struct btrfs_block_group_cache *cache;
13600
13601                 cache = btrfs_lookup_first_block_group(fs_info, start);
13602                 if (!cache)
13603                         break;
13604                 start = cache->key.objectid + cache->key.offset;
13605                 ret = btrfs_insert_item(trans, fs_info->extent_root,
13606                                         &cache->key, &cache->item,
13607                                         sizeof(cache->item));
13608                 if (ret) {
13609                         fprintf(stderr, "Error adding block group\n");
13610                         return ret;
13611                 }
13612                 btrfs_extent_post_op(trans, fs_info->extent_root);
13613         }
13614
13615         ret = reset_balance(trans, fs_info);
13616         if (ret)
13617                 fprintf(stderr, "error resetting the pending balance\n");
13618
13619         return ret;
13620 }
13621
13622 static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb)
13623 {
13624         struct btrfs_path path;
13625         struct btrfs_trans_handle *trans;
13626         struct btrfs_key key;
13627         int ret;
13628
13629         printf("Recowing metadata block %llu\n", eb->start);
13630         key.objectid = btrfs_header_owner(eb);
13631         key.type = BTRFS_ROOT_ITEM_KEY;
13632         key.offset = (u64)-1;
13633
13634         root = btrfs_read_fs_root(root->fs_info, &key);
13635         if (IS_ERR(root)) {
13636                 fprintf(stderr, "Couldn't find owner root %llu\n",
13637                         key.objectid);
13638                 return PTR_ERR(root);
13639         }
13640
13641         trans = btrfs_start_transaction(root, 1);
13642         if (IS_ERR(trans))
13643                 return PTR_ERR(trans);
13644
13645         btrfs_init_path(&path);
13646         path.lowest_level = btrfs_header_level(eb);
13647         if (path.lowest_level)
13648                 btrfs_node_key_to_cpu(eb, &key, 0);
13649         else
13650                 btrfs_item_key_to_cpu(eb, &key, 0);
13651
13652         ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
13653         btrfs_commit_transaction(trans, root);
13654         btrfs_release_path(&path);
13655         return ret;
13656 }
13657
13658 static int delete_bad_item(struct btrfs_root *root, struct bad_item *bad)
13659 {
13660         struct btrfs_path path;
13661         struct btrfs_trans_handle *trans;
13662         struct btrfs_key key;
13663         int ret;
13664
13665         printf("Deleting bad item [%llu,%u,%llu]\n", bad->key.objectid,
13666                bad->key.type, bad->key.offset);
13667         key.objectid = bad->root_id;
13668         key.type = BTRFS_ROOT_ITEM_KEY;
13669         key.offset = (u64)-1;
13670
13671         root = btrfs_read_fs_root(root->fs_info, &key);
13672         if (IS_ERR(root)) {
13673                 fprintf(stderr, "Couldn't find owner root %llu\n",
13674                         key.objectid);
13675                 return PTR_ERR(root);
13676         }
13677
13678         trans = btrfs_start_transaction(root, 1);
13679         if (IS_ERR(trans))
13680                 return PTR_ERR(trans);
13681
13682         btrfs_init_path(&path);
13683         ret = btrfs_search_slot(trans, root, &bad->key, &path, -1, 1);
13684         if (ret) {
13685                 if (ret > 0)
13686                         ret = 0;
13687                 goto out;
13688         }
13689         ret = btrfs_del_item(trans, root, &path);
13690 out:
13691         btrfs_commit_transaction(trans, root);
13692         btrfs_release_path(&path);
13693         return ret;
13694 }
13695
13696 static int zero_log_tree(struct btrfs_root *root)
13697 {
13698         struct btrfs_trans_handle *trans;
13699         int ret;
13700
13701         trans = btrfs_start_transaction(root, 1);
13702         if (IS_ERR(trans)) {
13703                 ret = PTR_ERR(trans);
13704                 return ret;
13705         }
13706         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
13707         btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
13708         ret = btrfs_commit_transaction(trans, root);
13709         return ret;
13710 }
13711
13712 static int populate_csum(struct btrfs_trans_handle *trans,
13713                          struct btrfs_root *csum_root, char *buf, u64 start,
13714                          u64 len)
13715 {
13716         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13717         u64 offset = 0;
13718         u64 sectorsize;
13719         int ret = 0;
13720
13721         while (offset < len) {
13722                 sectorsize = fs_info->sectorsize;
13723                 ret = read_extent_data(fs_info, buf, start + offset,
13724                                        &sectorsize, 0);
13725                 if (ret)
13726                         break;
13727                 ret = btrfs_csum_file_block(trans, csum_root, start + len,
13728                                             start + offset, buf, sectorsize);
13729                 if (ret)
13730                         break;
13731                 offset += sectorsize;
13732         }
13733         return ret;
13734 }
13735
13736 static int fill_csum_tree_from_one_fs_root(struct btrfs_trans_handle *trans,
13737                                       struct btrfs_root *csum_root,
13738                                       struct btrfs_root *cur_root)
13739 {
13740         struct btrfs_path path;
13741         struct btrfs_key key;
13742         struct extent_buffer *node;
13743         struct btrfs_file_extent_item *fi;
13744         char *buf = NULL;
13745         u64 start = 0;
13746         u64 len = 0;
13747         int slot = 0;
13748         int ret = 0;
13749
13750         buf = malloc(cur_root->fs_info->sectorsize);
13751         if (!buf)
13752                 return -ENOMEM;
13753
13754         btrfs_init_path(&path);
13755         key.objectid = 0;
13756         key.offset = 0;
13757         key.type = 0;
13758         ret = btrfs_search_slot(NULL, cur_root, &key, &path, 0, 0);
13759         if (ret < 0)
13760                 goto out;
13761         /* Iterate all regular file extents and fill its csum */
13762         while (1) {
13763                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
13764
13765                 if (key.type != BTRFS_EXTENT_DATA_KEY)
13766                         goto next;
13767                 node = path.nodes[0];
13768                 slot = path.slots[0];
13769                 fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item);
13770                 if (btrfs_file_extent_type(node, fi) != BTRFS_FILE_EXTENT_REG)
13771                         goto next;
13772                 start = btrfs_file_extent_disk_bytenr(node, fi);
13773                 len = btrfs_file_extent_disk_num_bytes(node, fi);
13774
13775                 ret = populate_csum(trans, csum_root, buf, start, len);
13776                 if (ret == -EEXIST)
13777                         ret = 0;
13778                 if (ret < 0)
13779                         goto out;
13780 next:
13781                 /*
13782                  * TODO: if next leaf is corrupted, jump to nearest next valid
13783                  * leaf.
13784                  */
13785                 ret = btrfs_next_item(cur_root, &path);
13786                 if (ret < 0)
13787                         goto out;
13788                 if (ret > 0) {
13789                         ret = 0;
13790                         goto out;
13791                 }
13792         }
13793
13794 out:
13795         btrfs_release_path(&path);
13796         free(buf);
13797         return ret;
13798 }
13799
13800 static int fill_csum_tree_from_fs(struct btrfs_trans_handle *trans,
13801                                   struct btrfs_root *csum_root)
13802 {
13803         struct btrfs_fs_info *fs_info = csum_root->fs_info;
13804         struct btrfs_path path;
13805         struct btrfs_root *tree_root = fs_info->tree_root;
13806         struct btrfs_root *cur_root;
13807         struct extent_buffer *node;
13808         struct btrfs_key key;
13809         int slot = 0;
13810         int ret = 0;
13811
13812         btrfs_init_path(&path);
13813         key.objectid = BTRFS_FS_TREE_OBJECTID;
13814         key.offset = 0;
13815         key.type = BTRFS_ROOT_ITEM_KEY;
13816         ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
13817         if (ret < 0)
13818                 goto out;
13819         if (ret > 0) {
13820                 ret = -ENOENT;
13821                 goto out;
13822         }
13823
13824         while (1) {
13825                 node = path.nodes[0];
13826                 slot = path.slots[0];
13827                 btrfs_item_key_to_cpu(node, &key, slot);
13828                 if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
13829                         goto out;
13830                 if (key.type != BTRFS_ROOT_ITEM_KEY)
13831                         goto next;
13832                 if (!is_fstree(key.objectid))
13833                         goto next;
13834                 key.offset = (u64)-1;
13835
13836                 cur_root = btrfs_read_fs_root(fs_info, &key);
13837                 if (IS_ERR(cur_root) || !cur_root) {
13838                         fprintf(stderr, "Fail to read fs/subvol tree: %lld\n",
13839                                 key.objectid);
13840                         goto out;
13841                 }
13842                 ret = fill_csum_tree_from_one_fs_root(trans, csum_root,
13843                                 cur_root);
13844                 if (ret < 0)
13845                         goto out;
13846 next:
13847                 ret = btrfs_next_item(tree_root, &path);
13848                 if (ret > 0) {
13849                         ret = 0;
13850                         goto out;
13851                 }
13852                 if (ret < 0)
13853                         goto out;
13854         }
13855
13856 out:
13857         btrfs_release_path(&path);
13858         return ret;
13859 }
13860
13861 static int fill_csum_tree_from_extent(struct btrfs_trans_handle *trans,
13862                                       struct btrfs_root *csum_root)
13863 {
13864         struct btrfs_root *extent_root = csum_root->fs_info->extent_root;
13865         struct btrfs_path path;
13866         struct btrfs_extent_item *ei;
13867         struct extent_buffer *leaf;
13868         char *buf;
13869         struct btrfs_key key;
13870         int ret;
13871
13872         btrfs_init_path(&path);
13873         key.objectid = 0;
13874         key.type = BTRFS_EXTENT_ITEM_KEY;
13875         key.offset = 0;
13876         ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0);
13877         if (ret < 0) {
13878                 btrfs_release_path(&path);
13879                 return ret;
13880         }
13881
13882         buf = malloc(csum_root->fs_info->sectorsize);
13883         if (!buf) {
13884                 btrfs_release_path(&path);
13885                 return -ENOMEM;
13886         }
13887
13888         while (1) {
13889                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
13890                         ret = btrfs_next_leaf(extent_root, &path);
13891                         if (ret < 0)
13892                                 break;
13893                         if (ret) {
13894                                 ret = 0;
13895                                 break;
13896                         }
13897                 }
13898                 leaf = path.nodes[0];
13899
13900                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
13901                 if (key.type != BTRFS_EXTENT_ITEM_KEY) {
13902                         path.slots[0]++;
13903                         continue;
13904                 }
13905
13906                 ei = btrfs_item_ptr(leaf, path.slots[0],
13907                                     struct btrfs_extent_item);
13908                 if (!(btrfs_extent_flags(leaf, ei) &
13909                       BTRFS_EXTENT_FLAG_DATA)) {
13910                         path.slots[0]++;
13911                         continue;
13912                 }
13913
13914                 ret = populate_csum(trans, csum_root, buf, key.objectid,
13915                                     key.offset);
13916                 if (ret)
13917                         break;
13918                 path.slots[0]++;
13919         }
13920
13921         btrfs_release_path(&path);
13922         free(buf);
13923         return ret;
13924 }
13925
13926 /*
13927  * Recalculate the csum and put it into the csum tree.
13928  *
13929  * Extent tree init will wipe out all the extent info, so in that case, we
13930  * can't depend on extent tree, but use fs tree.  If search_fs_tree is set, we
13931  * will use fs/subvol trees to init the csum tree.
13932  */
13933 static int fill_csum_tree(struct btrfs_trans_handle *trans,
13934                           struct btrfs_root *csum_root,
13935                           int search_fs_tree)
13936 {
13937         if (search_fs_tree)
13938                 return fill_csum_tree_from_fs(trans, csum_root);
13939         else
13940                 return fill_csum_tree_from_extent(trans, csum_root);
13941 }
13942
13943 static void free_roots_info_cache(void)
13944 {
13945         if (!roots_info_cache)
13946                 return;
13947
13948         while (!cache_tree_empty(roots_info_cache)) {
13949                 struct cache_extent *entry;
13950                 struct root_item_info *rii;
13951
13952                 entry = first_cache_extent(roots_info_cache);
13953                 if (!entry)
13954                         break;
13955                 remove_cache_extent(roots_info_cache, entry);
13956                 rii = container_of(entry, struct root_item_info, cache_extent);
13957                 free(rii);
13958         }
13959
13960         free(roots_info_cache);
13961         roots_info_cache = NULL;
13962 }
13963
13964 static int build_roots_info_cache(struct btrfs_fs_info *info)
13965 {
13966         int ret = 0;
13967         struct btrfs_key key;
13968         struct extent_buffer *leaf;
13969         struct btrfs_path path;
13970
13971         if (!roots_info_cache) {
13972                 roots_info_cache = malloc(sizeof(*roots_info_cache));
13973                 if (!roots_info_cache)
13974                         return -ENOMEM;
13975                 cache_tree_init(roots_info_cache);
13976         }
13977
13978         btrfs_init_path(&path);
13979         key.objectid = 0;
13980         key.type = BTRFS_EXTENT_ITEM_KEY;
13981         key.offset = 0;
13982         ret = btrfs_search_slot(NULL, info->extent_root, &key, &path, 0, 0);
13983         if (ret < 0)
13984                 goto out;
13985         leaf = path.nodes[0];
13986
13987         while (1) {
13988                 struct btrfs_key found_key;
13989                 struct btrfs_extent_item *ei;
13990                 struct btrfs_extent_inline_ref *iref;
13991                 int slot = path.slots[0];
13992                 int type;
13993                 u64 flags;
13994                 u64 root_id;
13995                 u8 level;
13996                 struct cache_extent *entry;
13997                 struct root_item_info *rii;
13998
13999                 if (slot >= btrfs_header_nritems(leaf)) {
14000                         ret = btrfs_next_leaf(info->extent_root, &path);
14001                         if (ret < 0) {
14002                                 break;
14003                         } else if (ret) {
14004                                 ret = 0;
14005                                 break;
14006                         }
14007                         leaf = path.nodes[0];
14008                         slot = path.slots[0];
14009                 }
14010
14011                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14012
14013                 if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
14014                     found_key.type != BTRFS_METADATA_ITEM_KEY)
14015                         goto next;
14016
14017                 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
14018                 flags = btrfs_extent_flags(leaf, ei);
14019
14020                 if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
14021                     !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
14022                         goto next;
14023
14024                 if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
14025                         iref = (struct btrfs_extent_inline_ref *)(ei + 1);
14026                         level = found_key.offset;
14027                 } else {
14028                         struct btrfs_tree_block_info *binfo;
14029
14030                         binfo = (struct btrfs_tree_block_info *)(ei + 1);
14031                         iref = (struct btrfs_extent_inline_ref *)(binfo + 1);
14032                         level = btrfs_tree_block_level(leaf, binfo);
14033                 }
14034
14035                 /*
14036                  * For a root extent, it must be of the following type and the
14037                  * first (and only one) iref in the item.
14038                  */
14039                 type = btrfs_extent_inline_ref_type(leaf, iref);
14040                 if (type != BTRFS_TREE_BLOCK_REF_KEY)
14041                         goto next;
14042
14043                 root_id = btrfs_extent_inline_ref_offset(leaf, iref);
14044                 entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14045                 if (!entry) {
14046                         rii = malloc(sizeof(struct root_item_info));
14047                         if (!rii) {
14048                                 ret = -ENOMEM;
14049                                 goto out;
14050                         }
14051                         rii->cache_extent.start = root_id;
14052                         rii->cache_extent.size = 1;
14053                         rii->level = (u8)-1;
14054                         entry = &rii->cache_extent;
14055                         ret = insert_cache_extent(roots_info_cache, entry);
14056                         ASSERT(ret == 0);
14057                 } else {
14058                         rii = container_of(entry, struct root_item_info,
14059                                            cache_extent);
14060                 }
14061
14062                 ASSERT(rii->cache_extent.start == root_id);
14063                 ASSERT(rii->cache_extent.size == 1);
14064
14065                 if (level > rii->level || rii->level == (u8)-1) {
14066                         rii->level = level;
14067                         rii->bytenr = found_key.objectid;
14068                         rii->gen = btrfs_extent_generation(leaf, ei);
14069                         rii->node_count = 1;
14070                 } else if (level == rii->level) {
14071                         rii->node_count++;
14072                 }
14073 next:
14074                 path.slots[0]++;
14075         }
14076
14077 out:
14078         btrfs_release_path(&path);
14079
14080         return ret;
14081 }
14082
14083 static int maybe_repair_root_item(struct btrfs_path *path,
14084                                   const struct btrfs_key *root_key,
14085                                   const int read_only_mode)
14086 {
14087         const u64 root_id = root_key->objectid;
14088         struct cache_extent *entry;
14089         struct root_item_info *rii;
14090         struct btrfs_root_item ri;
14091         unsigned long offset;
14092
14093         entry = lookup_cache_extent(roots_info_cache, root_id, 1);
14094         if (!entry) {
14095                 fprintf(stderr,
14096                         "Error: could not find extent items for root %llu\n",
14097                         root_key->objectid);
14098                 return -ENOENT;
14099         }
14100
14101         rii = container_of(entry, struct root_item_info, cache_extent);
14102         ASSERT(rii->cache_extent.start == root_id);
14103         ASSERT(rii->cache_extent.size == 1);
14104
14105         if (rii->node_count != 1) {
14106                 fprintf(stderr,
14107                         "Error: could not find btree root extent for root %llu\n",
14108                         root_id);
14109                 return -ENOENT;
14110         }
14111
14112         offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
14113         read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
14114
14115         if (btrfs_root_bytenr(&ri) != rii->bytenr ||
14116             btrfs_root_level(&ri) != rii->level ||
14117             btrfs_root_generation(&ri) != rii->gen) {
14118
14119                 /*
14120                  * If we're in repair mode but our caller told us to not update
14121                  * the root item, i.e. just check if it needs to be updated, don't
14122                  * print this message, since the caller will call us again shortly
14123                  * for the same root item without read only mode (the caller will
14124                  * open a transaction first).
14125                  */
14126                 if (!(read_only_mode && repair))
14127                         fprintf(stderr,
14128                                 "%sroot item for root %llu,"
14129                                 " current bytenr %llu, current gen %llu, current level %u,"
14130                                 " new bytenr %llu, new gen %llu, new level %u\n",
14131                                 (read_only_mode ? "" : "fixing "),
14132                                 root_id,
14133                                 btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
14134                                 btrfs_root_level(&ri),
14135                                 rii->bytenr, rii->gen, rii->level);
14136
14137                 if (btrfs_root_generation(&ri) > rii->gen) {
14138                         fprintf(stderr,
14139                                 "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
14140                                 root_id, btrfs_root_generation(&ri), rii->gen);
14141                         return -EINVAL;
14142                 }
14143
14144                 if (!read_only_mode) {
14145                         btrfs_set_root_bytenr(&ri, rii->bytenr);
14146                         btrfs_set_root_level(&ri, rii->level);
14147                         btrfs_set_root_generation(&ri, rii->gen);
14148                         write_extent_buffer(path->nodes[0], &ri,
14149                                             offset, sizeof(ri));
14150                 }
14151
14152                 return 1;
14153         }
14154
14155         return 0;
14156 }
14157
14158 /*
14159  * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
14160  * caused read-only snapshots to be corrupted if they were created at a moment
14161  * when the source subvolume/snapshot had orphan items. The issue was that the
14162  * on-disk root items became incorrect, referring to the pre orphan cleanup root
14163  * node instead of the post orphan cleanup root node.
14164  * So this function, and its callees, just detects and fixes those cases. Even
14165  * though the regression was for read-only snapshots, this function applies to
14166  * any snapshot/subvolume root.
14167  * This must be run before any other repair code - not doing it so, makes other
14168  * repair code delete or modify backrefs in the extent tree for example, which
14169  * will result in an inconsistent fs after repairing the root items.
14170  */
14171 static int repair_root_items(struct btrfs_fs_info *info)
14172 {
14173         struct btrfs_path path;
14174         struct btrfs_key key;
14175         struct extent_buffer *leaf;
14176         struct btrfs_trans_handle *trans = NULL;
14177         int ret = 0;
14178         int bad_roots = 0;
14179         int need_trans = 0;
14180
14181         btrfs_init_path(&path);
14182
14183         ret = build_roots_info_cache(info);
14184         if (ret)
14185                 goto out;
14186
14187         key.objectid = BTRFS_FIRST_FREE_OBJECTID;
14188         key.type = BTRFS_ROOT_ITEM_KEY;
14189         key.offset = 0;
14190
14191 again:
14192         /*
14193          * Avoid opening and committing transactions if a leaf doesn't have
14194          * any root items that need to be fixed, so that we avoid rotating
14195          * backup roots unnecessarily.
14196          */
14197         if (need_trans) {
14198                 trans = btrfs_start_transaction(info->tree_root, 1);
14199                 if (IS_ERR(trans)) {
14200                         ret = PTR_ERR(trans);
14201                         goto out;
14202                 }
14203         }
14204
14205         ret = btrfs_search_slot(trans, info->tree_root, &key, &path,
14206                                 0, trans ? 1 : 0);
14207         if (ret < 0)
14208                 goto out;
14209         leaf = path.nodes[0];
14210
14211         while (1) {
14212                 struct btrfs_key found_key;
14213
14214                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
14215                         int no_more_keys = find_next_key(&path, &key);
14216
14217                         btrfs_release_path(&path);
14218                         if (trans) {
14219                                 ret = btrfs_commit_transaction(trans,
14220                                                                info->tree_root);
14221                                 trans = NULL;
14222                                 if (ret < 0)
14223                                         goto out;
14224                         }
14225                         need_trans = 0;
14226                         if (no_more_keys)
14227                                 break;
14228                         goto again;
14229                 }
14230
14231                 btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]);
14232
14233                 if (found_key.type != BTRFS_ROOT_ITEM_KEY)
14234                         goto next;
14235                 if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
14236                         goto next;
14237
14238                 ret = maybe_repair_root_item(&path, &found_key, trans ? 0 : 1);
14239                 if (ret < 0)
14240                         goto out;
14241                 if (ret) {
14242                         if (!trans && repair) {
14243                                 need_trans = 1;
14244                                 key = found_key;
14245                                 btrfs_release_path(&path);
14246                                 goto again;
14247                         }
14248                         bad_roots++;
14249                 }
14250 next:
14251                 path.slots[0]++;
14252         }
14253         ret = 0;
14254 out:
14255         free_roots_info_cache();
14256         btrfs_release_path(&path);
14257         if (trans)
14258                 btrfs_commit_transaction(trans, info->tree_root);
14259         if (ret < 0)
14260                 return ret;
14261
14262         return bad_roots;
14263 }
14264
14265 static int clear_free_space_cache(struct btrfs_fs_info *fs_info)
14266 {
14267         struct btrfs_trans_handle *trans;
14268         struct btrfs_block_group_cache *bg_cache;
14269         u64 current = 0;
14270         int ret = 0;
14271
14272         /* Clear all free space cache inodes and its extent data */
14273         while (1) {
14274                 bg_cache = btrfs_lookup_first_block_group(fs_info, current);
14275                 if (!bg_cache)
14276                         break;
14277                 ret = btrfs_clear_free_space_cache(fs_info, bg_cache);
14278                 if (ret < 0)
14279                         return ret;
14280                 current = bg_cache->key.objectid + bg_cache->key.offset;
14281         }
14282
14283         /* Don't forget to set cache_generation to -1 */
14284         trans = btrfs_start_transaction(fs_info->tree_root, 0);
14285         if (IS_ERR(trans)) {
14286                 error("failed to update super block cache generation");
14287                 return PTR_ERR(trans);
14288         }
14289         btrfs_set_super_cache_generation(fs_info->super_copy, (u64)-1);
14290         btrfs_commit_transaction(trans, fs_info->tree_root);
14291
14292         return ret;
14293 }
14294
14295 static int do_clear_free_space_cache(struct btrfs_fs_info *fs_info,
14296                 int clear_version)
14297 {
14298         int ret = 0;
14299
14300         if (clear_version == 1) {
14301                 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14302                         error(
14303                 "free space cache v2 detected, use --clear-space-cache v2");
14304                         ret = 1;
14305                         goto close_out;
14306                 }
14307                 printf("Clearing free space cache\n");
14308                 ret = clear_free_space_cache(fs_info);
14309                 if (ret) {
14310                         error("failed to clear free space cache");
14311                         ret = 1;
14312                 } else {
14313                         printf("Free space cache cleared\n");
14314                 }
14315         } else if (clear_version == 2) {
14316                 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
14317                         printf("no free space cache v2 to clear\n");
14318                         ret = 0;
14319                         goto close_out;
14320                 }
14321                 printf("Clear free space cache v2\n");
14322                 ret = btrfs_clear_free_space_tree(fs_info);
14323                 if (ret) {
14324                         error("failed to clear free space cache v2: %d", ret);
14325                         ret = 1;
14326                 } else {
14327                         printf("free space cache v2 cleared\n");
14328                 }
14329         }
14330 close_out:
14331         return ret;
14332 }
14333
14334 const char * const cmd_check_usage[] = {
14335         "btrfs check [options] <device>",
14336         "Check structural integrity of a filesystem (unmounted).",
14337         "Check structural integrity of an unmounted filesystem. Verify internal",
14338         "trees' consistency and item connectivity. In the repair mode try to",
14339         "fix the problems found. ",
14340         "WARNING: the repair mode is considered dangerous",
14341         "",
14342         "-s|--super <superblock>     use this superblock copy",
14343         "-b|--backup                 use the first valid backup root copy",
14344         "--force                     skip mount checks, repair is not possible",
14345         "--repair                    try to repair the filesystem",
14346         "--readonly                  run in read-only mode (default)",
14347         "--init-csum-tree            create a new CRC tree",
14348         "--init-extent-tree          create a new extent tree",
14349         "--mode <MODE>               allows choice of memory/IO trade-offs",
14350         "                            where MODE is one of:",
14351         "                            original - read inodes and extents to memory (requires",
14352         "                                       more memory, does less IO)",
14353         "                            lowmem   - try to use less memory but read blocks again",
14354         "                                       when needed",
14355         "--check-data-csum           verify checksums of data blocks",
14356         "-Q|--qgroup-report          print a report on qgroup consistency",
14357         "-E|--subvol-extents <subvolid>",
14358         "                            print subvolume extents and sharing state",
14359         "-r|--tree-root <bytenr>     use the given bytenr for the tree root",
14360         "--chunk-root <bytenr>       use the given bytenr for the chunk tree root",
14361         "-p|--progress               indicate progress",
14362         "--clear-space-cache v1|v2   clear space cache for v1 or v2",
14363         NULL
14364 };
14365
14366 int cmd_check(int argc, char **argv)
14367 {
14368         struct cache_tree root_cache;
14369         struct btrfs_root *root;
14370         struct btrfs_fs_info *info;
14371         u64 bytenr = 0;
14372         u64 subvolid = 0;
14373         u64 tree_root_bytenr = 0;
14374         u64 chunk_root_bytenr = 0;
14375         char uuidbuf[BTRFS_UUID_UNPARSED_SIZE];
14376         int ret = 0;
14377         int err = 0;
14378         u64 num;
14379         int init_csum_tree = 0;
14380         int readonly = 0;
14381         int clear_space_cache = 0;
14382         int qgroup_report = 0;
14383         int qgroups_repaired = 0;
14384         unsigned ctree_flags = OPEN_CTREE_EXCLUSIVE;
14385         int force = 0;
14386
14387         while(1) {
14388                 int c;
14389                 enum { GETOPT_VAL_REPAIR = 257, GETOPT_VAL_INIT_CSUM,
14390                         GETOPT_VAL_INIT_EXTENT, GETOPT_VAL_CHECK_CSUM,
14391                         GETOPT_VAL_READONLY, GETOPT_VAL_CHUNK_TREE,
14392                         GETOPT_VAL_MODE, GETOPT_VAL_CLEAR_SPACE_CACHE,
14393                         GETOPT_VAL_FORCE };
14394                 static const struct option long_options[] = {
14395                         { "super", required_argument, NULL, 's' },
14396                         { "repair", no_argument, NULL, GETOPT_VAL_REPAIR },
14397                         { "readonly", no_argument, NULL, GETOPT_VAL_READONLY },
14398                         { "init-csum-tree", no_argument, NULL,
14399                                 GETOPT_VAL_INIT_CSUM },
14400                         { "init-extent-tree", no_argument, NULL,
14401                                 GETOPT_VAL_INIT_EXTENT },
14402                         { "check-data-csum", no_argument, NULL,
14403                                 GETOPT_VAL_CHECK_CSUM },
14404                         { "backup", no_argument, NULL, 'b' },
14405                         { "subvol-extents", required_argument, NULL, 'E' },
14406                         { "qgroup-report", no_argument, NULL, 'Q' },
14407                         { "tree-root", required_argument, NULL, 'r' },
14408                         { "chunk-root", required_argument, NULL,
14409                                 GETOPT_VAL_CHUNK_TREE },
14410                         { "progress", no_argument, NULL, 'p' },
14411                         { "mode", required_argument, NULL,
14412                                 GETOPT_VAL_MODE },
14413                         { "clear-space-cache", required_argument, NULL,
14414                                 GETOPT_VAL_CLEAR_SPACE_CACHE},
14415                         { "force", no_argument, NULL, GETOPT_VAL_FORCE },
14416                         { NULL, 0, NULL, 0}
14417                 };
14418
14419                 c = getopt_long(argc, argv, "as:br:pEQ", long_options, NULL);
14420                 if (c < 0)
14421                         break;
14422                 switch(c) {
14423                         case 'a': /* ignored */ break;
14424                         case 'b':
14425                                 ctree_flags |= OPEN_CTREE_BACKUP_ROOT;
14426                                 break;
14427                         case 's':
14428                                 num = arg_strtou64(optarg);
14429                                 if (num >= BTRFS_SUPER_MIRROR_MAX) {
14430                                         error(
14431                                         "super mirror should be less than %d",
14432                                                 BTRFS_SUPER_MIRROR_MAX);
14433                                         exit(1);
14434                                 }
14435                                 bytenr = btrfs_sb_offset(((int)num));
14436                                 printf("using SB copy %llu, bytenr %llu\n", num,
14437                                        (unsigned long long)bytenr);
14438                                 break;
14439                         case 'Q':
14440                                 qgroup_report = 1;
14441                                 break;
14442                         case 'E':
14443                                 subvolid = arg_strtou64(optarg);
14444                                 break;
14445                         case 'r':
14446                                 tree_root_bytenr = arg_strtou64(optarg);
14447                                 break;
14448                         case GETOPT_VAL_CHUNK_TREE:
14449                                 chunk_root_bytenr = arg_strtou64(optarg);
14450                                 break;
14451                         case 'p':
14452                                 ctx.progress_enabled = true;
14453                                 break;
14454                         case '?':
14455                         case 'h':
14456                                 usage(cmd_check_usage);
14457                         case GETOPT_VAL_REPAIR:
14458                                 printf("enabling repair mode\n");
14459                                 repair = 1;
14460                                 ctree_flags |= OPEN_CTREE_WRITES;
14461                                 break;
14462                         case GETOPT_VAL_READONLY:
14463                                 readonly = 1;
14464                                 break;
14465                         case GETOPT_VAL_INIT_CSUM:
14466                                 printf("Creating a new CRC tree\n");
14467                                 init_csum_tree = 1;
14468                                 repair = 1;
14469                                 ctree_flags |= OPEN_CTREE_WRITES;
14470                                 break;
14471                         case GETOPT_VAL_INIT_EXTENT:
14472                                 init_extent_tree = 1;
14473                                 ctree_flags |= (OPEN_CTREE_WRITES |
14474                                                 OPEN_CTREE_NO_BLOCK_GROUPS);
14475                                 repair = 1;
14476                                 break;
14477                         case GETOPT_VAL_CHECK_CSUM:
14478                                 check_data_csum = 1;
14479                                 break;
14480                         case GETOPT_VAL_MODE:
14481                                 check_mode = parse_check_mode(optarg);
14482                                 if (check_mode == CHECK_MODE_UNKNOWN) {
14483                                         error("unknown mode: %s", optarg);
14484                                         exit(1);
14485                                 }
14486                                 break;
14487                         case GETOPT_VAL_CLEAR_SPACE_CACHE:
14488                                 if (strcmp(optarg, "v1") == 0) {
14489                                         clear_space_cache = 1;
14490                                 } else if (strcmp(optarg, "v2") == 0) {
14491                                         clear_space_cache = 2;
14492                                         ctree_flags |= OPEN_CTREE_INVALIDATE_FST;
14493                                 } else {
14494                                         error(
14495                 "invalid argument to --clear-space-cache, must be v1 or v2");
14496                                         exit(1);
14497                                 }
14498                                 ctree_flags |= OPEN_CTREE_WRITES;
14499                                 break;
14500                         case GETOPT_VAL_FORCE:
14501                                 force = 1;
14502                                 break;
14503                 }
14504         }
14505
14506         if (check_argc_exact(argc - optind, 1))
14507                 usage(cmd_check_usage);
14508
14509         if (ctx.progress_enabled) {
14510                 ctx.tp = TASK_NOTHING;
14511                 ctx.info = task_init(print_status_check, print_status_return, &ctx);
14512         }
14513
14514         /* This check is the only reason for --readonly to exist */
14515         if (readonly && repair) {
14516                 error("repair options are not compatible with --readonly");
14517                 exit(1);
14518         }
14519
14520         /*
14521          * experimental and dangerous
14522          */
14523         if (repair && check_mode == CHECK_MODE_LOWMEM)
14524                 warning("low-memory mode repair support is only partial");
14525
14526         radix_tree_init();
14527         cache_tree_init(&root_cache);
14528
14529         ret = check_mounted(argv[optind]);
14530         if (!force) {
14531                 if (ret < 0) {
14532                         error("could not check mount status: %s",
14533                                         strerror(-ret));
14534                         err |= !!ret;
14535                         goto err_out;
14536                 } else if (ret) {
14537                         error(
14538 "%s is currently mounted, use --force if you really intend to check the filesystem",
14539                                 argv[optind]);
14540                         ret = -EBUSY;
14541                         err |= !!ret;
14542                         goto err_out;
14543                 }
14544         } else {
14545                 if (repair) {
14546                         error("repair and --force is not yet supported");
14547                         ret = 1;
14548                         err |= !!ret;
14549                         goto err_out;
14550                 }
14551                 if (ret < 0) {
14552                         warning(
14553 "cannot check mount status of %s, the filesystem could be mounted, continuing because of --force",
14554                                 argv[optind]);
14555                 } else if (ret) {
14556                         warning(
14557                         "filesystem mounted, continuing because of --force");
14558                 }
14559                 /* A block device is mounted in exclusive mode by kernel */
14560                 ctree_flags &= ~OPEN_CTREE_EXCLUSIVE;
14561         }
14562
14563         /* only allow partial opening under repair mode */
14564         if (repair)
14565                 ctree_flags |= OPEN_CTREE_PARTIAL;
14566
14567         info = open_ctree_fs_info(argv[optind], bytenr, tree_root_bytenr,
14568                                   chunk_root_bytenr, ctree_flags);
14569         if (!info) {
14570                 error("cannot open file system");
14571                 ret = -EIO;
14572                 err |= !!ret;
14573                 goto err_out;
14574         }
14575
14576         global_info = info;
14577         root = info->fs_root;
14578         uuid_unparse(info->super_copy->fsid, uuidbuf);
14579
14580         printf("Checking filesystem on %s\nUUID: %s\n", argv[optind], uuidbuf);
14581
14582         /*
14583          * Check the bare minimum before starting anything else that could rely
14584          * on it, namely the tree roots, any local consistency checks
14585          */
14586         if (!extent_buffer_uptodate(info->tree_root->node) ||
14587             !extent_buffer_uptodate(info->dev_root->node) ||
14588             !extent_buffer_uptodate(info->chunk_root->node)) {
14589                 error("critical roots corrupted, unable to check the filesystem");
14590                 err |= !!ret;
14591                 ret = -EIO;
14592                 goto close_out;
14593         }
14594
14595         if (clear_space_cache) {
14596                 ret = do_clear_free_space_cache(info, clear_space_cache);
14597                 err |= !!ret;
14598                 goto close_out;
14599         }
14600
14601         /*
14602          * repair mode will force us to commit transaction which
14603          * will make us fail to load log tree when mounting.
14604          */
14605         if (repair && btrfs_super_log_root(info->super_copy)) {
14606                 ret = ask_user("repair mode will force to clear out log tree, are you sure?");
14607                 if (!ret) {
14608                         ret = 1;
14609                         err |= !!ret;
14610                         goto close_out;
14611                 }
14612                 ret = zero_log_tree(root);
14613                 err |= !!ret;
14614                 if (ret) {
14615                         error("failed to zero log tree: %d", ret);
14616                         goto close_out;
14617                 }
14618         }
14619
14620         if (qgroup_report) {
14621                 printf("Print quota groups for %s\nUUID: %s\n", argv[optind],
14622                        uuidbuf);
14623                 ret = qgroup_verify_all(info);
14624                 err |= !!ret;
14625                 if (ret == 0)
14626                         report_qgroups(1);
14627                 goto close_out;
14628         }
14629         if (subvolid) {
14630                 printf("Print extent state for subvolume %llu on %s\nUUID: %s\n",
14631                        subvolid, argv[optind], uuidbuf);
14632                 ret = print_extent_state(info, subvolid);
14633                 err |= !!ret;
14634                 goto close_out;
14635         }
14636
14637         if (init_extent_tree || init_csum_tree) {
14638                 struct btrfs_trans_handle *trans;
14639
14640                 trans = btrfs_start_transaction(info->extent_root, 0);
14641                 if (IS_ERR(trans)) {
14642                         error("error starting transaction");
14643                         ret = PTR_ERR(trans);
14644                         err |= !!ret;
14645                         goto close_out;
14646                 }
14647
14648                 if (init_extent_tree) {
14649                         printf("Creating a new extent tree\n");
14650                         ret = reinit_extent_tree(trans, info);
14651                         err |= !!ret;
14652                         if (ret)
14653                                 goto close_out;
14654                 }
14655
14656                 if (init_csum_tree) {
14657                         printf("Reinitialize checksum tree\n");
14658                         ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0);
14659                         if (ret) {
14660                                 error("checksum tree initialization failed: %d",
14661                                                 ret);
14662                                 ret = -EIO;
14663                                 err |= !!ret;
14664                                 goto close_out;
14665                         }
14666
14667                         ret = fill_csum_tree(trans, info->csum_root,
14668                                              init_extent_tree);
14669                         err |= !!ret;
14670                         if (ret) {
14671                                 error("checksum tree refilling failed: %d", ret);
14672                                 return -EIO;
14673                         }
14674                 }
14675                 /*
14676                  * Ok now we commit and run the normal fsck, which will add
14677                  * extent entries for all of the items it finds.
14678                  */
14679                 ret = btrfs_commit_transaction(trans, info->extent_root);
14680                 err |= !!ret;
14681                 if (ret)
14682                         goto close_out;
14683         }
14684         if (!extent_buffer_uptodate(info->extent_root->node)) {
14685                 error("critical: extent_root, unable to check the filesystem");
14686                 ret = -EIO;
14687                 err |= !!ret;
14688                 goto close_out;
14689         }
14690         if (!extent_buffer_uptodate(info->csum_root->node)) {
14691                 error("critical: csum_root, unable to check the filesystem");
14692                 ret = -EIO;
14693                 err |= !!ret;
14694                 goto close_out;
14695         }
14696
14697         if (!init_extent_tree) {
14698                 ret = repair_root_items(info);
14699                 if (ret < 0) {
14700                         err = !!ret;
14701                         error("failed to repair root items: %s", strerror(-ret));
14702                         goto close_out;
14703                 }
14704                 if (repair) {
14705                         fprintf(stderr, "Fixed %d roots.\n", ret);
14706                         ret = 0;
14707                 } else if (ret > 0) {
14708                         fprintf(stderr,
14709                                 "Found %d roots with an outdated root item.\n",
14710                                 ret);
14711                         fprintf(stderr,
14712         "Please run a filesystem check with the option --repair to fix them.\n");
14713                         ret = 1;
14714                         err |= ret;
14715                         goto close_out;
14716                 }
14717         }
14718
14719         ret = do_check_chunks_and_extents(info);
14720         err |= !!ret;
14721         if (ret)
14722                 error(
14723                 "errors found in extent allocation tree or chunk allocation");
14724
14725         /* Only re-check super size after we checked and repaired the fs */
14726         err |= !is_super_size_valid(info);
14727
14728         if (!ctx.progress_enabled) {
14729                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14730                         fprintf(stderr, "checking free space tree\n");
14731                 else
14732                         fprintf(stderr, "checking free space cache\n");
14733         }
14734         ret = check_space_cache(root);
14735         err |= !!ret;
14736         if (ret) {
14737                 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
14738                         error("errors found in free space tree");
14739                 else
14740                         error("errors found in free space cache");
14741                 goto out;
14742         }
14743
14744         /*
14745          * We used to have to have these hole extents in between our real
14746          * extents so if we don't have this flag set we need to make sure there
14747          * are no gaps in the file extents for inodes, otherwise we can just
14748          * ignore it when this happens.
14749          */
14750         no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
14751         ret = do_check_fs_roots(info, &root_cache);
14752         err |= !!ret;
14753         if (ret) {
14754                 error("errors found in fs roots");
14755                 goto out;
14756         }
14757
14758         fprintf(stderr, "checking csums\n");
14759         ret = check_csums(root);
14760         err |= !!ret;
14761         if (ret) {
14762                 error("errors found in csum tree");
14763                 goto out;
14764         }
14765
14766         fprintf(stderr, "checking root refs\n");
14767         /* For low memory mode, check_fs_roots_v2 handles root refs */
14768         if (check_mode != CHECK_MODE_LOWMEM) {
14769                 ret = check_root_refs(root, &root_cache);
14770                 err |= !!ret;
14771                 if (ret) {
14772                         error("errors found in root refs");
14773                         goto out;
14774                 }
14775         }
14776
14777         while (repair && !list_empty(&root->fs_info->recow_ebs)) {
14778                 struct extent_buffer *eb;
14779
14780                 eb = list_first_entry(&root->fs_info->recow_ebs,
14781                                       struct extent_buffer, recow);
14782                 list_del_init(&eb->recow);
14783                 ret = recow_extent_buffer(root, eb);
14784                 err |= !!ret;
14785                 if (ret) {
14786                         error("fails to fix transid errors");
14787                         break;
14788                 }
14789         }
14790
14791         while (!list_empty(&delete_items)) {
14792                 struct bad_item *bad;
14793
14794                 bad = list_first_entry(&delete_items, struct bad_item, list);
14795                 list_del_init(&bad->list);
14796                 if (repair) {
14797                         ret = delete_bad_item(root, bad);
14798                         err |= !!ret;
14799                 }
14800                 free(bad);
14801         }
14802
14803         if (info->quota_enabled) {
14804                 fprintf(stderr, "checking quota groups\n");
14805                 ret = qgroup_verify_all(info);
14806                 err |= !!ret;
14807                 if (ret) {
14808                         error("failed to check quota groups");
14809                         goto out;
14810                 }
14811                 report_qgroups(0);
14812                 ret = repair_qgroups(info, &qgroups_repaired);
14813                 err |= !!ret;
14814                 if (err) {
14815                         error("failed to repair quota groups");
14816                         goto out;
14817                 }
14818                 ret = 0;
14819         }
14820
14821         if (!list_empty(&root->fs_info->recow_ebs)) {
14822                 error("transid errors in file system");
14823                 ret = 1;
14824                 err |= !!ret;
14825         }
14826 out:
14827         printf("found %llu bytes used, ",
14828                (unsigned long long)bytes_used);
14829         if (err)
14830                 printf("error(s) found\n");
14831         else
14832                 printf("no error found\n");
14833         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
14834         printf("total tree bytes: %llu\n",
14835                (unsigned long long)total_btree_bytes);
14836         printf("total fs tree bytes: %llu\n",
14837                (unsigned long long)total_fs_tree_bytes);
14838         printf("total extent tree bytes: %llu\n",
14839                (unsigned long long)total_extent_tree_bytes);
14840         printf("btree space waste bytes: %llu\n",
14841                (unsigned long long)btree_space_waste);
14842         printf("file data blocks allocated: %llu\n referenced %llu\n",
14843                 (unsigned long long)data_bytes_allocated,
14844                 (unsigned long long)data_bytes_referenced);
14845
14846         free_qgroup_counts();
14847         free_root_recs_tree(&root_cache);
14848 close_out:
14849         close_ctree(root);
14850 err_out:
14851         if (ctx.progress_enabled)
14852                 task_deinit(ctx.info);
14853
14854         return err;
14855 }