faster btrfsck
[platform/upstream/btrfs-progs.git] / disk-io.c
1 #define _XOPEN_SOURCE 600
2 #define __USE_XOPEN2K
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/types.h>
6 #include <sys/stat.h>
7 #include <fcntl.h>
8 #include <unistd.h>
9 #include "kerncompat.h"
10 #include "radix-tree.h"
11 #include "ctree.h"
12 #include "disk-io.h"
13 #include "transaction.h"
14
15 static int allocated_blocks = 0;
16 int cache_max = 10000;
17
18 struct dev_lookup {
19         u64 block_start;
20         u64 num_blocks;
21         u64 device_id;
22         int fd;
23 };
24
25 int btrfs_insert_dev_radix(struct btrfs_root *root,
26                            int fd,
27                            u64 device_id,
28                            u64 block_start,
29                            u64 num_blocks)
30 {
31         struct dev_lookup *lookup;
32         int ret;
33
34         lookup = malloc(sizeof(*lookup));
35         if (!lookup)
36                 return -ENOMEM;
37         lookup->block_start = block_start;
38         lookup->num_blocks = num_blocks;
39         lookup->fd = fd;
40         lookup->device_id = device_id;
41 printf("inserting into dev radix %Lu %Lu\n", block_start, num_blocks);
42
43         ret = radix_tree_insert(&root->fs_info->dev_radix, block_start +
44                                 num_blocks - 1, lookup);
45         return ret;
46 }
47
48 int btrfs_map_bh_to_logical(struct btrfs_root *root, struct btrfs_buffer *bh,
49                              u64 logical)
50 {
51         struct dev_lookup *lookup[2];
52
53         int ret;
54
55         root = root->fs_info->dev_root;
56         ret = radix_tree_gang_lookup(&root->fs_info->dev_radix,
57                                      (void **)lookup,
58                                      (unsigned long)logical,
59                                      ARRAY_SIZE(lookup));
60         if (ret == 0 || lookup[0]->block_start > logical ||
61             lookup[0]->block_start + lookup[0]->num_blocks <= logical) {
62                 ret = -1;
63                 goto out;
64         }
65         bh->fd = lookup[0]->fd;
66         bh->dev_blocknr = logical - lookup[0]->block_start;
67         ret = 0;
68 out:
69         return ret;
70 }
71
72 static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf)
73 {
74         if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
75                 BUG();
76         if (memcmp(root->fs_info->disk_super->fsid, buf->node.header.fsid,
77                    sizeof(buf->node.header.fsid)))
78                 BUG();
79         return 0;
80 }
81
82 static int free_some_buffers(struct btrfs_root *root)
83 {
84         struct list_head *node, *next;
85         struct btrfs_buffer *b;
86         if (root->fs_info->cache_size < cache_max)
87                 return 0;
88         list_for_each_safe(node, next, &root->fs_info->cache) {
89                 b = list_entry(node, struct btrfs_buffer, cache);
90                 if (b->count == 1) {
91                         BUG_ON(!list_empty(&b->dirty));
92                         list_del_init(&b->cache);
93                         btrfs_block_release(root, b);
94                         if (root->fs_info->cache_size < cache_max)
95                                 break;
96                 }
97         }
98         return 0;
99 }
100
101 struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr)
102 {
103         struct btrfs_buffer *buf;
104         int ret;
105
106         buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize);
107         if (!buf)
108                 return buf;
109         allocated_blocks++;
110         buf->blocknr = blocknr;
111         buf->count = 2;
112         INIT_LIST_HEAD(&buf->dirty);
113         free_some_buffers(root);
114         radix_tree_preload(GFP_KERNEL);
115         ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf);
116         radix_tree_preload_end();
117         list_add_tail(&buf->cache, &root->fs_info->cache);
118         root->fs_info->cache_size++;
119         if (ret) {
120                 free(buf);
121                 return NULL;
122         }
123         return buf;
124 }
125
126 struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr)
127 {
128         struct btrfs_buffer *buf;
129         buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
130         if (buf) {
131                 buf->count++;
132         } else {
133                 buf = alloc_tree_block(root, blocknr);
134                 if (!buf) {
135                         BUG();
136                         return NULL;
137                 }
138         }
139         return buf;
140 }
141
142 struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr)
143 {
144         struct btrfs_buffer *buf;
145         int ret;
146         buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
147         if (buf) {
148                 buf->count++;
149         } else {
150                 buf = alloc_tree_block(root, blocknr);
151                 if (!buf)
152                         return NULL;
153                 btrfs_map_bh_to_logical(root, buf, blocknr);
154                 ret = pread(buf->fd, &buf->node, root->blocksize,
155                             buf->dev_blocknr * root->blocksize);
156                 if (ret != root->blocksize) {
157                         free(buf);
158                         return NULL;
159                 }
160         }
161         if (check_tree_block(root, buf))
162                 BUG();
163         return buf;
164 }
165
166 int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
167                      struct btrfs_buffer *buf)
168 {
169         if (!list_empty(&buf->dirty))
170                 return 0;
171         list_add_tail(&buf->dirty, &root->fs_info->trans);
172         buf->count++;
173         return 0;
174 }
175
176 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
177                      struct btrfs_buffer *buf)
178 {
179         if (!list_empty(&buf->dirty)) {
180                 list_del_init(&buf->dirty);
181                 btrfs_block_release(root, buf);
182         }
183         return 0;
184 }
185
186 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
187                      struct btrfs_buffer *buf)
188 {
189         int ret;
190
191         if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
192                 BUG();
193         btrfs_map_bh_to_logical(root, buf, buf->blocknr);
194         ret = pwrite(buf->fd, &buf->node, root->blocksize,
195                      buf->dev_blocknr * root->blocksize);
196         if (ret != root->blocksize)
197                 return ret;
198         return 0;
199 }
200
201 static int __commit_transaction(struct btrfs_trans_handle *trans, struct
202                                 btrfs_root *root)
203 {
204         struct btrfs_buffer *b;
205         int ret = 0;
206         int wret;
207         while(!list_empty(&root->fs_info->trans)) {
208                 b = list_entry(root->fs_info->trans.next, struct btrfs_buffer,
209                                dirty);
210                 list_del_init(&b->dirty);
211                 wret = write_tree_block(trans, root, b);
212                 if (wret)
213                         ret = wret;
214                 btrfs_block_release(root, b);
215         }
216         return ret;
217 }
218
219 static int commit_tree_roots(struct btrfs_trans_handle *trans,
220                              struct btrfs_fs_info *fs_info)
221 {
222         int ret;
223         u64 old_extent_block;
224         struct btrfs_root *tree_root = fs_info->tree_root;
225         struct btrfs_root *extent_root = fs_info->extent_root;
226
227         if (btrfs_super_device_root(fs_info->disk_super) !=
228             fs_info->dev_root->node->blocknr) {
229                 btrfs_set_super_device_root(fs_info->disk_super,
230                                             fs_info->dev_root->node->blocknr);
231         }
232         while(1) {
233                 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
234                 if (old_extent_block == extent_root->node->blocknr)
235                         break;
236                 btrfs_set_root_blocknr(&extent_root->root_item,
237                                        extent_root->node->blocknr);
238                 ret = btrfs_update_root(trans, tree_root,
239                                         &extent_root->root_key,
240                                         &extent_root->root_item);
241                 BUG_ON(ret);
242         }
243         return 0;
244 }
245
246 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct
247                              btrfs_root *root, struct btrfs_super_block *s)
248 {
249         int ret = 0;
250         struct btrfs_buffer *snap = root->commit_root;
251         struct btrfs_key snap_key;
252
253         if (root->commit_root == root->node)
254                 return 0;
255
256         memcpy(&snap_key, &root->root_key, sizeof(snap_key));
257         root->root_key.offset++;
258
259         btrfs_set_root_blocknr(&root->root_item, root->node->blocknr);
260         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
261                                 &root->root_key, &root->root_item);
262         BUG_ON(ret);
263
264         ret = commit_tree_roots(trans, root->fs_info);
265         BUG_ON(ret);
266
267         ret = __commit_transaction(trans, root);
268         BUG_ON(ret);
269
270         write_ctree_super(trans, root, s);
271         btrfs_finish_extent_commit(trans, root->fs_info->extent_root);
272         btrfs_finish_extent_commit(trans, root->fs_info->tree_root);
273
274         root->commit_root = root->node;
275         root->node->count++;
276         ret = btrfs_drop_snapshot(trans, root, snap);
277         BUG_ON(ret);
278
279         ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key);
280         BUG_ON(ret);
281         root->fs_info->generation = root->root_key.offset + 1;
282
283         return ret;
284 }
285
286 static int __setup_root(struct btrfs_super_block *super,
287                         struct btrfs_root *root,
288                         struct btrfs_fs_info *fs_info,
289                         u64 objectid, int fp)
290 {
291         root->node = NULL;
292         root->commit_root = NULL;
293         root->blocksize = btrfs_super_blocksize(super);
294         root->ref_cows = 0;
295         root->fs_info = fs_info;
296         memset(&root->root_key, 0, sizeof(root->root_key));
297         memset(&root->root_item, 0, sizeof(root->root_item));
298         root->root_key.objectid = objectid;
299         return 0;
300 }
301
302 static int find_and_setup_root(struct btrfs_super_block *super,
303                                struct btrfs_root *tree_root,
304                                struct btrfs_fs_info *fs_info,
305                                u64 objectid,
306                                struct btrfs_root *root, int fp)
307 {
308         int ret;
309
310         __setup_root(super, root, fs_info, objectid, fp);
311         ret = btrfs_find_last_root(tree_root, objectid,
312                                    &root->root_item, &root->root_key);
313         BUG_ON(ret);
314
315         root->node = read_tree_block(root,
316                                      btrfs_root_blocknr(&root->root_item));
317         BUG_ON(!root->node);
318         return 0;
319 }
320
321 int btrfs_open_disk(struct btrfs_root *root, u64 device_id,
322                     u64 block_start, u64 num_blocks,
323                     char *filename, int name_len)
324 {
325         char *null_filename;
326         int fd;
327         int ret;
328
329         null_filename = malloc(name_len + 1);
330         if (!null_filename)
331                 return -ENOMEM;
332         memcpy(null_filename, filename, name_len);
333         null_filename[name_len] = '\0';
334
335         fd = open(null_filename, O_RDWR);
336         if (fd < 0) {
337                 ret = -1;
338                 goto out;
339         }
340
341         posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
342         posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE);
343         ret = btrfs_insert_dev_radix(root, fd, device_id,
344                                      block_start, num_blocks);
345         BUG_ON(ret);
346         ret = 0;
347 out:
348         free(null_filename);
349         return ret;
350 }
351
352 static int read_device_info(struct btrfs_root *root)
353 {
354         struct btrfs_path path;
355         int ret;
356         struct btrfs_key key;
357         struct btrfs_leaf *leaf;
358         struct btrfs_device_item *dev_item;
359         int nritems;
360         int slot;
361
362         root = root->fs_info->dev_root;
363
364         btrfs_init_path(&path);
365         key.objectid = 0;
366         key.offset = 0;
367         key.flags = 0;
368         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
369
370         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
371         leaf = &path.nodes[0]->leaf;
372         nritems = btrfs_header_nritems(&leaf->header);
373         while(1) {
374                 slot = path.slots[0];
375                 if (slot >= nritems) {
376                         ret = btrfs_next_leaf(root, &path);
377                         if (ret)
378                                 break;
379                         leaf = &path.nodes[0]->leaf;
380                         nritems = btrfs_header_nritems(&leaf->header);
381                         slot = path.slots[0];
382                 }
383                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
384                 if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) {
385                         path.slots[0]++;
386                         continue;
387                 }
388                 dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item);
389                 if (btrfs_device_id(dev_item) !=
390                     btrfs_super_device_id(root->fs_info->disk_super)) {
391 printf("found key %Lu %Lu\n", key.objectid, key.offset);
392                         ret = btrfs_open_disk(root, btrfs_device_id(dev_item),
393                                               key.objectid, key.offset,
394                                               (char *)(dev_item + 1),
395                                               btrfs_device_pathlen(dev_item));
396                         BUG_ON(ret);
397                 }
398                 path.slots[0]++;
399         }
400         btrfs_release_path(root, &path);
401         return 0;
402 }
403
404 struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super)
405 {
406         int fp;
407
408         fp = open(filename, O_CREAT | O_RDWR, 0600);
409         if (fp < 0) {
410                 return NULL;
411         }
412         return open_ctree_fd(fp, super);
413 }
414
415 struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super)
416 {
417         struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
418         struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
419         struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
420         struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root));
421         struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
422         struct dev_lookup *dev_lookup;
423         int ret;
424
425         INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL);
426         INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL);
427         INIT_RADIX_TREE(&fs_info->dev_radix, GFP_KERNEL);
428         INIT_LIST_HEAD(&fs_info->trans);
429         INIT_LIST_HEAD(&fs_info->cache);
430         fs_info->cache_size = 0;
431         fs_info->fp = fp;
432         fs_info->running_transaction = NULL;
433         fs_info->fs_root = root;
434         fs_info->tree_root = tree_root;
435         fs_info->extent_root = extent_root;
436         fs_info->dev_root = dev_root;
437         fs_info->last_inode_alloc = 0;
438         fs_info->last_inode_alloc_dirid = 0;
439         fs_info->disk_super = super;
440         memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
441         memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
442
443         ret = pread(fp, super, sizeof(struct btrfs_super_block),
444                      BTRFS_SUPER_INFO_OFFSET);
445         if (ret == 0 || btrfs_super_root(super) == 0) {
446                 BUG();
447                 return NULL;
448         }
449         BUG_ON(ret < 0);
450         __setup_root(super, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID, fp);
451
452         dev_lookup = malloc(sizeof(*dev_lookup));
453         dev_lookup->fd = fp;
454         dev_lookup->device_id = btrfs_super_device_id(super);
455         dev_lookup->block_start = btrfs_super_device_block_start(super);
456         dev_lookup->num_blocks = btrfs_super_device_num_blocks(super);
457         ret = radix_tree_insert(&fs_info->dev_radix,
458                                 dev_lookup->block_start +
459                                 dev_lookup->num_blocks - 1, dev_lookup);
460         BUG_ON(ret);
461
462         dev_root->node = read_tree_block(dev_root,
463                                          btrfs_super_device_root(super));
464
465         ret = read_device_info(dev_root);
466         BUG_ON(ret);
467
468         __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp);
469         tree_root->node = read_tree_block(tree_root, btrfs_super_root(super));
470         BUG_ON(!tree_root->node);
471
472         ret = find_and_setup_root(super, tree_root, fs_info,
473                                   BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp);
474         BUG_ON(ret);
475
476         ret = find_and_setup_root(super, tree_root, fs_info,
477                                   BTRFS_FS_TREE_OBJECTID, root, fp);
478         BUG_ON(ret);
479
480         root->commit_root = root->node;
481         root->node->count++;
482         root->ref_cows = 1;
483         root->fs_info->generation = root->root_key.offset + 1;
484         return root;
485 }
486
487 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
488                       *root, struct btrfs_super_block *s)
489 {
490         int ret;
491         btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr);
492         ret = pwrite(root->fs_info->fp, s, sizeof(*s),
493                      BTRFS_SUPER_INFO_OFFSET);
494         if (ret != sizeof(*s)) {
495                 fprintf(stderr, "failed to write new super block err %d\n", ret);
496                 return ret;
497         }
498         return 0;
499 }
500
501 static int drop_cache(struct btrfs_root *root)
502 {
503         while(!list_empty(&root->fs_info->cache)) {
504                 struct btrfs_buffer *b = list_entry(root->fs_info->cache.next,
505                                                     struct btrfs_buffer,
506                                                     cache);
507                 list_del_init(&b->cache);
508                 btrfs_block_release(root, b);
509         }
510         return 0;
511 }
512
513 static int free_dev_radix(struct btrfs_fs_info *fs_info)
514 {
515         struct dev_lookup *lookup[8];
516         int ret;
517         int i;
518         while(1) {
519                 ret = radix_tree_gang_lookup(&fs_info->dev_radix,
520                                              (void **)lookup, 0,
521                                              ARRAY_SIZE(lookup));
522                 if (!ret)
523                         break;
524                 for (i = 0; i < ret; i++) {
525                         if (lookup[i]->device_id !=
526                             btrfs_super_device_id(fs_info->disk_super))
527                                 close(lookup[i]->fd);
528                         radix_tree_delete(&fs_info->dev_radix,
529                                           lookup[i]->block_start +
530                                           lookup[i]->num_blocks - 1);
531                         free(lookup[i]);
532                 }
533         }
534         return 0;
535 }
536
537 int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s)
538 {
539         int ret;
540         struct btrfs_trans_handle *trans;
541
542         trans = root->fs_info->running_transaction;
543         btrfs_commit_transaction(trans, root, s);
544         ret = commit_tree_roots(trans, root->fs_info);
545         BUG_ON(ret);
546         ret = __commit_transaction(trans, root);
547         BUG_ON(ret);
548         write_ctree_super(trans, root, s);
549         drop_cache(root);
550         BUG_ON(!list_empty(&root->fs_info->trans));
551
552         free_dev_radix(root->fs_info);
553         close(root->fs_info->fp);
554         if (root->node)
555                 btrfs_block_release(root, root->node);
556         if (root->fs_info->extent_root->node)
557                 btrfs_block_release(root->fs_info->extent_root,
558                                     root->fs_info->extent_root->node);
559         if (root->fs_info->tree_root->node)
560                 btrfs_block_release(root->fs_info->tree_root,
561                                     root->fs_info->tree_root->node);
562         if (root->fs_info->dev_root->node)
563                 btrfs_block_release(root->fs_info->dev_root,
564                                     root->fs_info->dev_root->node);
565         btrfs_block_release(root, root->commit_root);
566         free(root);
567         printf("on close %d blocks are allocated\n", allocated_blocks);
568         return 0;
569 }
570
571 void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf)
572 {
573         buf->count--;
574         if (buf->count < 0)
575                 BUG();
576         if (buf->count == 0) {
577                 BUG_ON(!list_empty(&buf->cache));
578                 BUG_ON(!list_empty(&buf->dirty));
579                 if (!radix_tree_lookup(&root->fs_info->cache_radix,
580                                        buf->blocknr))
581                         BUG();
582                 radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr);
583                 memset(buf, 0, sizeof(*buf));
584                 free(buf);
585                 BUG_ON(allocated_blocks == 0);
586                 allocated_blocks--;
587                 BUG_ON(root->fs_info->cache_size == 0);
588                 root->fs_info->cache_size--;
589         }
590 }
591