add GPLv2
[platform/upstream/btrfs-progs.git] / btrfsck.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 500
20 #include <stdio.h>
21 #include <stdlib.h>
22 #define __USE_GNU
23 #include <fcntl.h>
24 #include "kerncompat.h"
25 #include "radix-tree.h"
26 #include "ctree.h"
27 #include "disk-io.h"
28 #include "print-tree.h"
29 #include "transaction.h"
30 #include "bit-radix.h"
31
32 static u64 blocks_used = 0;
33 static u64 total_csum_bytes = 0;
34 static u64 total_btree_blocks = 0;
35 static u64 btree_space_waste = 0;
36
37 struct extent_record {
38         struct btrfs_disk_key parent_key;
39         u64 start;
40         u64 nr;
41         u64 owner;
42         u32 refs;
43         u32 extent_item_refs;
44         int checked;
45 };
46
47 static int check_node(struct btrfs_root *root,
48                       struct btrfs_disk_key *parent_key,
49                       struct btrfs_node *node)
50 {
51         int i;
52         u32 nritems = btrfs_header_nritems(&node->header);
53
54         if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(root))
55                 return 1;
56         if (parent_key->flags) {
57                 if (memcmp(parent_key, &node->ptrs[0].key,
58                               sizeof(struct btrfs_disk_key)))
59                         return 1;
60         }
61         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
62                 struct btrfs_key cpukey;
63                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
64                 if (btrfs_comp_keys(&node->ptrs[i].key, &cpukey) >= 0)
65                         return 1;
66         }
67         return 0;
68 }
69
70 static int check_leaf(struct btrfs_root *root,
71                       struct btrfs_disk_key *parent_key,
72                       struct btrfs_leaf *leaf)
73 {
74         int i;
75         u32 nritems = btrfs_header_nritems(&leaf->header);
76
77         if (btrfs_header_level(&leaf->header) != 0) {
78                 fprintf(stderr, "leaf is not a leaf %llu\n",
79                        (unsigned long long)btrfs_header_blocknr(&leaf->header));
80                 return 1;
81         }
82         if (btrfs_leaf_free_space(root, leaf) < 0) {
83                 fprintf(stderr, "leaf free space incorrect %llu %d\n",
84                         (unsigned long long)btrfs_header_blocknr(&leaf->header),
85                         btrfs_leaf_free_space(root, leaf));
86                 return 1;
87         }
88
89         if (nritems == 0)
90                 return 0;
91
92         if (parent_key->flags && memcmp(parent_key, &leaf->items[0].key,
93                                         sizeof(struct btrfs_disk_key))) {
94                 fprintf(stderr, "leaf parent key incorrect %llu\n",
95                        (unsigned long long)btrfs_header_blocknr(&leaf->header));
96                 return 1;
97         }
98         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
99                 struct btrfs_key cpukey;
100                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
101                 if (btrfs_comp_keys(&leaf->items[i].key,
102                                  &cpukey) >= 0)
103                         return 1;
104                 if (btrfs_item_offset(leaf->items + i) !=
105                         btrfs_item_end(leaf->items + i + 1))
106                         return 1;
107                 if (i == 0) {
108                         if (btrfs_item_offset(leaf->items + i) +
109                                btrfs_item_size(leaf->items + i) !=
110                                BTRFS_LEAF_DATA_SIZE(root))
111                                 return 1;
112                 }
113         }
114         return 0;
115 }
116
117 static int maybe_free_extent_rec(struct radix_tree_root *extent_radix,
118                                  struct extent_record *rec)
119 {
120         if (rec->checked && rec->extent_item_refs == rec->refs &&
121             rec->refs > 0) {
122                 radix_tree_delete(extent_radix, rec->start);
123                 free(rec);
124         }
125         return 0;
126 }
127
128 static int check_block(struct btrfs_root *root,
129                        struct radix_tree_root *extent_radix,
130                        struct btrfs_buffer *buf)
131 {
132         struct extent_record *rec;
133         int ret = 1;
134
135         rec = radix_tree_lookup(extent_radix, buf->blocknr);
136         if (!rec)
137                 return 1;
138         if (btrfs_is_leaf(&buf->node)) {
139                 ret = check_leaf(root, &rec->parent_key, &buf->leaf);
140         } else {
141                 ret = check_node(root, &rec->parent_key, &buf->node);
142         }
143         rec->checked = 1;
144         if (!ret)
145                 maybe_free_extent_rec(extent_radix, rec);
146         return ret;
147 }
148
149 static int add_extent_rec(struct radix_tree_root *extent_radix,
150                           struct btrfs_disk_key *parent_key,
151                           u64 ref, u64 start, u64 nr, u64 owner,
152                           u32 extent_item_refs, int inc_ref, int set_checked)
153 {
154         struct extent_record *rec;
155         int ret = 0;
156         rec = radix_tree_lookup(extent_radix, start);
157         if (rec) {
158                 if (inc_ref)
159                         rec->refs++;
160                 if (start != rec->start) {
161                         fprintf(stderr, "warning, start mismatch %llu %llu\n",
162                                 (unsigned long long)rec->start,
163                                 (unsigned long long)start);
164                         ret = 1;
165                 }
166                 if (extent_item_refs) {
167                         if (rec->extent_item_refs) {
168                                 fprintf(stderr, "block %llu rec extent_item_refs %u, passed %u\n", (unsigned long long)start, rec->extent_item_refs, extent_item_refs);
169                         }
170                         rec->extent_item_refs = extent_item_refs;
171                 }
172                 if (set_checked)
173                         rec->checked = 1;
174                 maybe_free_extent_rec(extent_radix, rec);
175                 return ret;
176         }
177         rec = malloc(sizeof(*rec));
178         if (start == 0)
179                 extent_item_refs = 0;
180         rec->start = start;
181         rec->nr = nr;
182         rec->owner = owner;
183         rec->checked = 0;
184
185         if (inc_ref)
186                 rec->refs = 1;
187         else
188                 rec->refs = 0;
189
190         if (extent_item_refs)
191                 rec->extent_item_refs = extent_item_refs;
192         else
193                 rec->extent_item_refs = 0;
194
195         if (parent_key)
196                 memcpy(&rec->parent_key, parent_key, sizeof(*parent_key));
197         else
198                 memset(&rec->parent_key, 0, sizeof(*parent_key));
199
200         ret = radix_tree_insert(extent_radix, start, rec);
201         BUG_ON(ret);
202         blocks_used += nr;
203         if (set_checked)
204                 rec->checked = 1;
205         return ret;
206 }
207
208 static int add_pending(struct radix_tree_root *pending,
209                        struct radix_tree_root *seen, u64 blocknr)
210 {
211         if (test_radix_bit(seen, blocknr))
212                 return -EEXIST;
213         set_radix_bit(pending, blocknr);
214         set_radix_bit(seen, blocknr);
215         return 0;
216 }
217 static int pick_next_pending(struct radix_tree_root *pending,
218                         struct radix_tree_root *reada,
219                         struct radix_tree_root *nodes,
220                         u64 last, unsigned long *bits, int bits_nr,
221                         int *reada_bits)
222 {
223         unsigned long node_start = last;
224         int ret;
225         ret = find_first_radix_bit(reada, bits, 0, 1);
226         if (ret) {
227                 *reada_bits = 1;
228                 return ret;
229         }
230         *reada_bits = 0;
231         if (node_start > 8)
232                 node_start -= 8;
233         ret = find_first_radix_bit(nodes, bits, node_start, bits_nr);
234         if (!ret)
235                 ret = find_first_radix_bit(nodes, bits, 0, bits_nr);
236         if (ret) {
237                 if (bits_nr - ret > 8) {
238                         int ret2;
239                         u64 sequential;
240                         ret2 = find_first_radix_bit(pending, bits + ret,
241                                                     bits[0], bits_nr - ret);
242                         sequential = bits[0];
243                         while(ret2 > 0) {
244                                 if (bits[ret] - sequential > 8)
245                                         break;
246                                 sequential = bits[ret];
247                                 ret++;
248                                 ret2--;
249                         }
250                 }
251                 return ret;
252         }
253         return find_first_radix_bit(pending, bits, 0, bits_nr);
254 }
255 static struct btrfs_buffer reada_buf;
256
257 static int run_next_block(struct btrfs_root *root,
258                           unsigned long *bits,
259                           int bits_nr,
260                           u64 *last,
261                           struct radix_tree_root *pending,
262                           struct radix_tree_root *seen,
263                           struct radix_tree_root *reada,
264                           struct radix_tree_root *nodes,
265                           struct radix_tree_root *extent_radix)
266 {
267         struct btrfs_buffer *buf;
268         u64 blocknr;
269         int ret;
270         int i;
271         int nritems;
272         struct btrfs_leaf *leaf;
273         struct btrfs_node *node;
274         struct btrfs_disk_key *disk_key;
275         int reada_bits;
276
277         u64 last_block = 0;
278         ret = pick_next_pending(pending, reada, nodes, *last, bits,
279                                 bits_nr, &reada_bits);
280         if (ret == 0) {
281                 return 1;
282         }
283         if (!reada_bits) {
284                 for(i = 0; i < ret; i++) {
285                         u64 offset;
286                         set_radix_bit(reada, bits[i]);
287                         btrfs_map_bh_to_logical(root, &reada_buf, bits[i]);
288                         offset = reada_buf.dev_blocknr * root->blocksize;
289                         last_block = bits[i];
290                         readahead(reada_buf.fd, offset, root->blocksize);
291                 }
292         }
293         *last = bits[0];
294         blocknr = bits[0];
295         clear_radix_bit(pending, blocknr);
296         clear_radix_bit(reada, blocknr);
297         clear_radix_bit(nodes, blocknr);
298         buf = read_tree_block(root, blocknr);
299         nritems = btrfs_header_nritems(&buf->node.header);
300         ret = check_block(root, extent_radix, buf);
301         if (ret) {
302                 fprintf(stderr, "bad block %llu\n",
303                         (unsigned long long)blocknr);
304         }
305         if (btrfs_is_leaf(&buf->node)) {
306                 leaf = &buf->leaf;
307                 btree_space_waste += btrfs_leaf_free_space(root, leaf);
308                 for (i = 0; i < nritems; i++) {
309                         struct btrfs_file_extent_item *fi;
310                         disk_key = &leaf->items[i].key;
311                         if (btrfs_disk_key_type(disk_key) ==
312                             BTRFS_EXTENT_ITEM_KEY) {
313                                 struct btrfs_key found;
314                                 struct btrfs_extent_item *ei;
315                                 btrfs_disk_key_to_cpu(&found,
316                                                       &leaf->items[i].key);
317                                 ei = btrfs_item_ptr(leaf, i,
318                                                     struct btrfs_extent_item);
319                                 add_extent_rec(extent_radix, NULL, 0,
320                                                found.objectid,
321                                                found.offset,
322                                                btrfs_extent_owner(ei),
323                                                btrfs_extent_refs(ei), 0, 0);
324                                 continue;
325                         }
326                         if (btrfs_disk_key_type(disk_key) ==
327                             BTRFS_CSUM_ITEM_KEY) {
328                                 total_csum_bytes +=
329                                         btrfs_item_size(leaf->items + i);
330                                 continue;
331                         }
332                         if (btrfs_disk_key_type(disk_key) ==
333                             BTRFS_BLOCK_GROUP_ITEM_KEY) {
334                                 struct btrfs_block_group_item *bi;
335                                 bi = btrfs_item_ptr(leaf, i,
336                                             struct btrfs_block_group_item);
337 #if 0
338                                 fprintf(stderr,"block group %Lu %Lu used %Lu ",
339                                         btrfs_disk_key_objectid(disk_key),
340                                         btrfs_disk_key_offset(disk_key),
341                                         btrfs_block_group_used(bi));
342                                 fprintf(stderr, "flags %x\n", bi->flags);
343 #endif
344                                 continue;
345                         }
346                         if (btrfs_disk_key_type(&leaf->items[i].key) !=
347                             BTRFS_EXTENT_DATA_KEY)
348                                 continue;
349                         fi = btrfs_item_ptr(leaf, i,
350                                             struct btrfs_file_extent_item);
351                         if (btrfs_file_extent_type(fi) !=
352                             BTRFS_FILE_EXTENT_REG)
353                                 continue;
354                         if (btrfs_file_extent_disk_blocknr(fi) == 0)
355                                 continue;
356                         ret = add_extent_rec(extent_radix, NULL, blocknr,
357                                    btrfs_file_extent_disk_blocknr(fi),
358                                    btrfs_file_extent_disk_num_blocks(fi),
359                                    btrfs_disk_key_objectid(&leaf->items[i].key),
360                                    0, 1, 1);
361                         BUG_ON(ret);
362                 }
363         } else {
364                 int level;
365                 node = &buf->node;
366                 level = btrfs_header_level(&node->header);
367                 for (i = 0; i < nritems; i++) {
368                         u64 ptr = btrfs_node_blockptr(node, i);
369                         ret = add_extent_rec(extent_radix,
370                                              &node->ptrs[i].key,
371                                              blocknr, ptr, 1,
372                                              btrfs_header_owner(&node->header),
373                                              0, 1, 0);
374                         BUG_ON(ret);
375                         if (level > 1) {
376                                 add_pending(nodes, seen, ptr);
377                         } else {
378                                 add_pending(pending, seen, ptr);
379                         }
380                 }
381                 btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(root) -
382                                       nritems) * sizeof(struct btrfs_key_ptr);
383         }
384         btrfs_block_release(root, buf);
385         total_btree_blocks++;
386         return 0;
387 }
388
389 static int add_root_to_pending(struct btrfs_buffer *buf,
390                                unsigned long *bits,
391                                int bits_nr,
392                                struct radix_tree_root *extent_radix,
393                                struct radix_tree_root *pending,
394                                struct radix_tree_root *seen,
395                                struct radix_tree_root *reada,
396                                struct radix_tree_root *nodes)
397 {
398         if (btrfs_header_level(&buf->node.header) > 0)
399                 add_pending(nodes, seen, buf->blocknr);
400         else
401                 add_pending(pending, seen, buf->blocknr);
402         add_extent_rec(extent_radix, NULL, 0, buf->blocknr, 1,
403                        btrfs_header_owner(&buf->node.header), 0, 1, 0);
404         return 0;
405 }
406
407 int check_extent_refs(struct btrfs_root *root,
408                       struct radix_tree_root *extent_radix)
409 {
410         struct extent_record *rec[64];
411         int i;
412         int ret;
413         int err = 0;
414
415         while(1) {
416                 ret = radix_tree_gang_lookup(extent_radix, (void **)rec, 0,
417                                              ARRAY_SIZE(rec));
418                 if (!ret)
419                         break;
420                 for (i = 0; i < ret; i++) {
421                         if (rec[i]->refs != rec[i]->extent_item_refs) {
422                                 fprintf(stderr, "ref mismatch on [%llu %llu] ",
423                                         (unsigned long long)rec[i]->start,
424                                         (unsigned long long)rec[i]->nr);
425                                 fprintf(stderr, "extent item %u, found %u\n",
426                                         rec[i]->extent_item_refs,
427                                         rec[i]->refs);
428                                 err = 1;
429                         }
430                         radix_tree_delete(extent_radix, rec[i]->start);
431                         free(rec[i]);
432                 }
433         }
434         return err;
435 }
436
437 int main(int ac, char **av) {
438         struct btrfs_super_block super;
439         struct btrfs_root *root;
440         struct radix_tree_root extent_radix;
441         struct radix_tree_root seen;
442         struct radix_tree_root pending;
443         struct radix_tree_root reada;
444         struct radix_tree_root nodes;
445         struct btrfs_path path;
446         struct btrfs_key key;
447         struct btrfs_key found_key;
448         int ret;
449         u64 last = 0;
450         unsigned long *bits;
451         int bits_nr;
452         struct btrfs_leaf *leaf;
453         int slot;
454         struct btrfs_root_item *ri;
455
456         radix_tree_init();
457
458
459         INIT_RADIX_TREE(&extent_radix, GFP_NOFS);
460         init_bit_radix(&seen);
461         init_bit_radix(&pending);
462         init_bit_radix(&reada);
463         init_bit_radix(&nodes);
464
465         root = open_ctree(av[1], &super);
466
467         bits_nr = 1024 * 1024 / root->blocksize;
468         bits = malloc(bits_nr * sizeof(unsigned long));
469         if (!bits) {
470                 perror("malloc");
471                 exit(1);
472         }
473
474         add_root_to_pending(root->fs_info->tree_root->node, bits, bits_nr,
475                             &extent_radix, &pending, &seen, &reada, &nodes);
476
477         btrfs_init_path(&path);
478         key.offset = 0;
479         key.objectid = 0;
480         key.flags = 0;
481         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
482         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
483                                         &key, &path, 0, 0);
484         BUG_ON(ret < 0);
485         while(1) {
486                 leaf = &path.nodes[0]->leaf;
487                 slot = path.slots[0];
488                 if (slot >= btrfs_header_nritems(&leaf->header)) {
489                         ret = btrfs_next_leaf(root, &path);
490                         if (ret != 0)
491                                 break;
492                         leaf = &path.nodes[0]->leaf;
493                         slot = path.slots[0];
494                 }
495                 btrfs_disk_key_to_cpu(&found_key,
496                                       &leaf->items[path.slots[0]].key);
497                 if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) {
498                         struct btrfs_buffer *buf;
499                         ri = btrfs_item_ptr(leaf, path.slots[0],
500                                             struct btrfs_root_item);
501                         buf = read_tree_block(root->fs_info->tree_root,
502                                               btrfs_root_blocknr(ri));
503                         add_root_to_pending(buf, bits, bits_nr, &extent_radix,
504                                             &pending, &seen, &reada, &nodes);
505                         btrfs_block_release(root->fs_info->tree_root, buf);
506                 }
507                 path.slots[0]++;
508         }
509         btrfs_release_path(root, &path);
510         while(1) {
511                 ret = run_next_block(root, bits, bits_nr, &last, &pending,
512                                      &seen, &reada, &nodes, &extent_radix);
513                 if (ret != 0)
514                         break;
515         }
516         ret = check_extent_refs(root, &extent_radix);
517         close_ctree(root, &super);
518         printf("found %llu blocks used err is %d\n",
519                (unsigned long long)blocks_used, ret);
520         printf("total csum bytes: %llu\n",(unsigned long long)total_csum_bytes);
521         printf("total tree blocks: %llu\n",
522                (unsigned long long)total_btree_blocks);
523         printf("btree space waste bytes: %llu\n",
524                (unsigned long long)btree_space_waste);
525         return ret;
526 }