start of block group code
[platform/upstream/btrfs-progs.git] / disk-io.c
1 #define _XOPEN_SOURCE 600
2 #define __USE_XOPEN2K
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/types.h>
6 #include <sys/stat.h>
7 #include <fcntl.h>
8 #include <unistd.h>
9 #include "kerncompat.h"
10 #include "radix-tree.h"
11 #include "ctree.h"
12 #include "disk-io.h"
13 #include "transaction.h"
14
15 static int allocated_blocks = 0;
16 int cache_max = 10000;
17
18 struct dev_lookup {
19         u64 block_start;
20         u64 num_blocks;
21         u64 device_id;
22         int fd;
23 };
24
25 int btrfs_insert_dev_radix(struct btrfs_root *root,
26                            int fd,
27                            u64 device_id,
28                            u64 block_start,
29                            u64 num_blocks)
30 {
31         struct dev_lookup *lookup;
32         int ret;
33
34         lookup = malloc(sizeof(*lookup));
35         if (!lookup)
36                 return -ENOMEM;
37         lookup->block_start = block_start;
38         lookup->num_blocks = num_blocks;
39         lookup->fd = fd;
40         lookup->device_id = device_id;
41 printf("inserting into dev radix %Lu %Lu\n", block_start, num_blocks);
42
43         ret = radix_tree_insert(&root->fs_info->dev_radix, block_start +
44                                 num_blocks - 1, lookup);
45         return ret;
46 }
47
48 int btrfs_map_bh_to_logical(struct btrfs_root *root, struct btrfs_buffer *bh,
49                              u64 logical)
50 {
51         struct dev_lookup *lookup[2];
52
53         int ret;
54
55         root = root->fs_info->dev_root;
56         ret = radix_tree_gang_lookup(&root->fs_info->dev_radix,
57                                      (void **)lookup,
58                                      (unsigned long)logical,
59                                      ARRAY_SIZE(lookup));
60         if (ret == 0 || lookup[0]->block_start > logical ||
61             lookup[0]->block_start + lookup[0]->num_blocks <= logical) {
62                 ret = -1;
63                 goto out;
64         }
65         bh->fd = lookup[0]->fd;
66         bh->dev_blocknr = logical - lookup[0]->block_start;
67         ret = 0;
68 out:
69         return ret;
70 }
71
72 static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf)
73 {
74         if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
75                 BUG();
76         if (memcmp(root->fs_info->disk_super->fsid, buf->node.header.fsid,
77                    sizeof(buf->node.header.fsid)))
78                 BUG();
79         return 0;
80 }
81
82 static int free_some_buffers(struct btrfs_root *root)
83 {
84         struct list_head *node, *next;
85         struct btrfs_buffer *b;
86         if (root->fs_info->cache_size < cache_max)
87                 return 0;
88         list_for_each_safe(node, next, &root->fs_info->cache) {
89                 b = list_entry(node, struct btrfs_buffer, cache);
90                 if (b->count == 1) {
91                         BUG_ON(!list_empty(&b->dirty));
92                         list_del_init(&b->cache);
93                         btrfs_block_release(root, b);
94                         if (root->fs_info->cache_size < cache_max)
95                                 break;
96                 }
97         }
98         return 0;
99 }
100
101 struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr)
102 {
103         struct btrfs_buffer *buf;
104         int ret;
105
106         buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize);
107         if (!buf)
108                 return buf;
109         allocated_blocks++;
110         buf->blocknr = blocknr;
111         buf->count = 2;
112         INIT_LIST_HEAD(&buf->dirty);
113         free_some_buffers(root);
114         radix_tree_preload(GFP_KERNEL);
115         ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf);
116         radix_tree_preload_end();
117         list_add_tail(&buf->cache, &root->fs_info->cache);
118         root->fs_info->cache_size++;
119         if (ret) {
120                 free(buf);
121                 return NULL;
122         }
123         return buf;
124 }
125
126 struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr)
127 {
128         struct btrfs_buffer *buf;
129         buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
130         if (buf) {
131                 buf->count++;
132         } else {
133                 buf = alloc_tree_block(root, blocknr);
134                 if (!buf) {
135                         BUG();
136                         return NULL;
137                 }
138         }
139         return buf;
140 }
141
142 struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr)
143 {
144         struct btrfs_buffer *buf;
145         int ret;
146         buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
147         if (buf) {
148                 buf->count++;
149                 if (check_tree_block(root, buf))
150                         BUG();
151         } else {
152                 buf = alloc_tree_block(root, blocknr);
153                 if (!buf)
154                         return NULL;
155                 btrfs_map_bh_to_logical(root, buf, blocknr);
156                 ret = pread(buf->fd, &buf->node, root->blocksize,
157                             buf->dev_blocknr * root->blocksize);
158                 if (ret != root->blocksize) {
159                         free(buf);
160                         return NULL;
161                 }
162                 if (check_tree_block(root, buf))
163                         BUG();
164         }
165         return buf;
166 }
167
168 int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
169                      struct btrfs_buffer *buf)
170 {
171         if (!list_empty(&buf->dirty))
172                 return 0;
173         list_add_tail(&buf->dirty, &root->fs_info->trans);
174         buf->count++;
175         if (check_tree_block(root, buf))
176                 BUG();
177         return 0;
178 }
179
180 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
181                      struct btrfs_buffer *buf)
182 {
183         if (!list_empty(&buf->dirty)) {
184                 list_del_init(&buf->dirty);
185                 btrfs_block_release(root, buf);
186         }
187         return 0;
188 }
189
190 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
191                      struct btrfs_buffer *buf)
192 {
193         int ret;
194
195         if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
196                 BUG();
197         btrfs_map_bh_to_logical(root, buf, buf->blocknr);
198         if (check_tree_block(root, buf))
199                 BUG();
200         ret = pwrite(buf->fd, &buf->node, root->blocksize,
201                      buf->dev_blocknr * root->blocksize);
202         if (ret != root->blocksize)
203                 return ret;
204         return 0;
205 }
206
207 static int __commit_transaction(struct btrfs_trans_handle *trans, struct
208                                 btrfs_root *root)
209 {
210         struct btrfs_buffer *b;
211         int ret = 0;
212         int wret;
213         while(!list_empty(&root->fs_info->trans)) {
214                 b = list_entry(root->fs_info->trans.next, struct btrfs_buffer,
215                                dirty);
216                 list_del_init(&b->dirty);
217                 wret = write_tree_block(trans, root, b);
218                 if (wret)
219                         ret = wret;
220                 btrfs_block_release(root, b);
221         }
222         return ret;
223 }
224
225 static int commit_tree_roots(struct btrfs_trans_handle *trans,
226                              struct btrfs_fs_info *fs_info)
227 {
228         int ret;
229         u64 old_extent_block;
230         struct btrfs_root *tree_root = fs_info->tree_root;
231         struct btrfs_root *extent_root = fs_info->extent_root;
232
233         if (btrfs_super_device_root(fs_info->disk_super) !=
234             fs_info->dev_root->node->blocknr) {
235                 btrfs_set_super_device_root(fs_info->disk_super,
236                                             fs_info->dev_root->node->blocknr);
237         }
238         btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
239         while(1) {
240                 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
241                 if (old_extent_block == extent_root->node->blocknr)
242                         break;
243                 btrfs_set_root_blocknr(&extent_root->root_item,
244                                        extent_root->node->blocknr);
245                 ret = btrfs_update_root(trans, tree_root,
246                                         &extent_root->root_key,
247                                         &extent_root->root_item);
248                 BUG_ON(ret);
249                 btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
250         }
251         return 0;
252 }
253
254 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct
255                              btrfs_root *root, struct btrfs_super_block *s)
256 {
257         int ret = 0;
258         struct btrfs_buffer *snap = root->commit_root;
259         struct btrfs_key snap_key;
260
261         if (root->commit_root == root->node)
262                 return 0;
263
264         memcpy(&snap_key, &root->root_key, sizeof(snap_key));
265         root->root_key.offset++;
266
267         btrfs_set_root_blocknr(&root->root_item, root->node->blocknr);
268         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
269                                 &root->root_key, &root->root_item);
270         BUG_ON(ret);
271
272         ret = commit_tree_roots(trans, root->fs_info);
273         BUG_ON(ret);
274
275         ret = __commit_transaction(trans, root);
276         BUG_ON(ret);
277
278         write_ctree_super(trans, root, s);
279         btrfs_finish_extent_commit(trans, root->fs_info->extent_root);
280         btrfs_finish_extent_commit(trans, root->fs_info->tree_root);
281
282         root->commit_root = root->node;
283         root->node->count++;
284         ret = btrfs_drop_snapshot(trans, root, snap);
285         BUG_ON(ret);
286
287         ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key);
288         BUG_ON(ret);
289         root->fs_info->generation = root->root_key.offset + 1;
290
291         return ret;
292 }
293
294 static int __setup_root(struct btrfs_super_block *super,
295                         struct btrfs_root *root,
296                         struct btrfs_fs_info *fs_info,
297                         u64 objectid, int fp)
298 {
299         root->node = NULL;
300         root->commit_root = NULL;
301         root->blocksize = btrfs_super_blocksize(super);
302         root->ref_cows = 0;
303         root->fs_info = fs_info;
304         memset(&root->root_key, 0, sizeof(root->root_key));
305         memset(&root->root_item, 0, sizeof(root->root_item));
306         root->root_key.objectid = objectid;
307         return 0;
308 }
309
310 static int find_and_setup_root(struct btrfs_super_block *super,
311                                struct btrfs_root *tree_root,
312                                struct btrfs_fs_info *fs_info,
313                                u64 objectid,
314                                struct btrfs_root *root, int fp)
315 {
316         int ret;
317
318         __setup_root(super, root, fs_info, objectid, fp);
319         ret = btrfs_find_last_root(tree_root, objectid,
320                                    &root->root_item, &root->root_key);
321         BUG_ON(ret);
322
323         root->node = read_tree_block(root,
324                                      btrfs_root_blocknr(&root->root_item));
325         BUG_ON(!root->node);
326         return 0;
327 }
328
329 int btrfs_open_disk(struct btrfs_root *root, u64 device_id,
330                     u64 block_start, u64 num_blocks,
331                     char *filename, int name_len)
332 {
333         char *null_filename;
334         int fd;
335         int ret;
336
337         null_filename = malloc(name_len + 1);
338         if (!null_filename)
339                 return -ENOMEM;
340         memcpy(null_filename, filename, name_len);
341         null_filename[name_len] = '\0';
342
343         fd = open(null_filename, O_RDWR);
344         if (fd < 0) {
345                 ret = -1;
346                 goto out;
347         }
348
349         posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM);
350         posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE);
351         ret = btrfs_insert_dev_radix(root, fd, device_id,
352                                      block_start, num_blocks);
353         BUG_ON(ret);
354         ret = 0;
355 out:
356         free(null_filename);
357         return ret;
358 }
359
360 static int read_device_info(struct btrfs_root *root)
361 {
362         struct btrfs_path path;
363         int ret;
364         struct btrfs_key key;
365         struct btrfs_leaf *leaf;
366         struct btrfs_device_item *dev_item;
367         int nritems;
368         int slot;
369
370         root = root->fs_info->dev_root;
371
372         btrfs_init_path(&path);
373         key.objectid = 0;
374         key.offset = 0;
375         key.flags = 0;
376         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
377
378         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
379         leaf = &path.nodes[0]->leaf;
380         nritems = btrfs_header_nritems(&leaf->header);
381         while(1) {
382                 slot = path.slots[0];
383                 if (slot >= nritems) {
384                         ret = btrfs_next_leaf(root, &path);
385                         if (ret)
386                                 break;
387                         leaf = &path.nodes[0]->leaf;
388                         nritems = btrfs_header_nritems(&leaf->header);
389                         slot = path.slots[0];
390                 }
391                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
392                 if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) {
393                         path.slots[0]++;
394                         continue;
395                 }
396                 dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item);
397                 if (btrfs_device_id(dev_item) !=
398                     btrfs_super_device_id(root->fs_info->disk_super)) {
399 printf("found key %Lu %Lu\n", key.objectid, key.offset);
400                         ret = btrfs_open_disk(root, btrfs_device_id(dev_item),
401                                               key.objectid, key.offset,
402                                               (char *)(dev_item + 1),
403                                               btrfs_device_pathlen(dev_item));
404                         BUG_ON(ret);
405                 }
406                 path.slots[0]++;
407         }
408         btrfs_release_path(root, &path);
409         return 0;
410 }
411
412 struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super)
413 {
414         int fp;
415
416         fp = open(filename, O_CREAT | O_RDWR, 0600);
417         if (fp < 0) {
418                 return NULL;
419         }
420         return open_ctree_fd(fp, super);
421 }
422
423 struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super)
424 {
425         struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
426         struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
427         struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
428         struct btrfs_root *dev_root = malloc(sizeof(struct btrfs_root));
429         struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
430         struct dev_lookup *dev_lookup;
431         int ret;
432
433         INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL);
434         INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL);
435         INIT_RADIX_TREE(&fs_info->dev_radix, GFP_KERNEL);
436         INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
437         INIT_LIST_HEAD(&fs_info->trans);
438         INIT_LIST_HEAD(&fs_info->cache);
439         fs_info->cache_size = 0;
440         fs_info->fp = fp;
441         fs_info->running_transaction = NULL;
442         fs_info->fs_root = root;
443         fs_info->tree_root = tree_root;
444         fs_info->extent_root = extent_root;
445         fs_info->dev_root = dev_root;
446         fs_info->last_inode_alloc = 0;
447         fs_info->last_inode_alloc_dirid = 0;
448         fs_info->disk_super = super;
449         memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
450         memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
451
452         ret = pread(fp, super, sizeof(struct btrfs_super_block),
453                      BTRFS_SUPER_INFO_OFFSET);
454         if (ret == 0 || btrfs_super_root(super) == 0) {
455                 BUG();
456                 return NULL;
457         }
458         BUG_ON(ret < 0);
459         __setup_root(super, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID, fp);
460
461         dev_lookup = malloc(sizeof(*dev_lookup));
462         dev_lookup->fd = fp;
463         dev_lookup->device_id = btrfs_super_device_id(super);
464         dev_lookup->block_start = btrfs_super_device_block_start(super);
465         dev_lookup->num_blocks = btrfs_super_device_num_blocks(super);
466         ret = radix_tree_insert(&fs_info->dev_radix,
467                                 dev_lookup->block_start +
468                                 dev_lookup->num_blocks - 1, dev_lookup);
469         BUG_ON(ret);
470
471         dev_root->node = read_tree_block(dev_root,
472                                          btrfs_super_device_root(super));
473
474         ret = read_device_info(dev_root);
475         BUG_ON(ret);
476
477         __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp);
478         tree_root->node = read_tree_block(tree_root, btrfs_super_root(super));
479         BUG_ON(!tree_root->node);
480
481         ret = find_and_setup_root(super, tree_root, fs_info,
482                                   BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp);
483         BUG_ON(ret);
484
485         ret = find_and_setup_root(super, tree_root, fs_info,
486                                   BTRFS_FS_TREE_OBJECTID, root, fp);
487         BUG_ON(ret);
488
489         root->commit_root = root->node;
490         root->node->count++;
491         root->ref_cows = 1;
492         root->fs_info->generation = root->root_key.offset + 1;
493         btrfs_read_block_groups(root);
494         return root;
495 }
496
497 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
498                       *root, struct btrfs_super_block *s)
499 {
500         int ret;
501         btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr);
502         ret = pwrite(root->fs_info->fp, s, sizeof(*s),
503                      BTRFS_SUPER_INFO_OFFSET);
504         if (ret != sizeof(*s)) {
505                 fprintf(stderr, "failed to write new super block err %d\n", ret);
506                 return ret;
507         }
508         return 0;
509 }
510
511 static int drop_cache(struct btrfs_root *root)
512 {
513         while(!list_empty(&root->fs_info->cache)) {
514                 struct btrfs_buffer *b = list_entry(root->fs_info->cache.next,
515                                                     struct btrfs_buffer,
516                                                     cache);
517                 list_del_init(&b->cache);
518                 btrfs_block_release(root, b);
519         }
520         return 0;
521 }
522
523 static int free_dev_radix(struct btrfs_fs_info *fs_info)
524 {
525         struct dev_lookup *lookup[8];
526         int ret;
527         int i;
528         while(1) {
529                 ret = radix_tree_gang_lookup(&fs_info->dev_radix,
530                                              (void **)lookup, 0,
531                                              ARRAY_SIZE(lookup));
532                 if (!ret)
533                         break;
534                 for (i = 0; i < ret; i++) {
535                         if (lookup[i]->device_id !=
536                             btrfs_super_device_id(fs_info->disk_super))
537                                 close(lookup[i]->fd);
538                         radix_tree_delete(&fs_info->dev_radix,
539                                           lookup[i]->block_start +
540                                           lookup[i]->num_blocks - 1);
541                         free(lookup[i]);
542                 }
543         }
544         return 0;
545 }
546
547 int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s)
548 {
549         int ret;
550         struct btrfs_trans_handle *trans;
551
552         trans = root->fs_info->running_transaction;
553         btrfs_commit_transaction(trans, root, s);
554         ret = commit_tree_roots(trans, root->fs_info);
555         BUG_ON(ret);
556         ret = __commit_transaction(trans, root);
557         BUG_ON(ret);
558         write_ctree_super(trans, root, s);
559         drop_cache(root);
560         BUG_ON(!list_empty(&root->fs_info->trans));
561
562         free_dev_radix(root->fs_info);
563         btrfs_free_block_groups(root->fs_info);
564         close(root->fs_info->fp);
565         if (root->node)
566                 btrfs_block_release(root, root->node);
567         if (root->fs_info->extent_root->node)
568                 btrfs_block_release(root->fs_info->extent_root,
569                                     root->fs_info->extent_root->node);
570         if (root->fs_info->tree_root->node)
571                 btrfs_block_release(root->fs_info->tree_root,
572                                     root->fs_info->tree_root->node);
573         if (root->fs_info->dev_root->node)
574                 btrfs_block_release(root->fs_info->dev_root,
575                                     root->fs_info->dev_root->node);
576         btrfs_block_release(root, root->commit_root);
577         free(root);
578         printf("on close %d blocks are allocated\n", allocated_blocks);
579         return 0;
580 }
581
582 void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf)
583 {
584         buf->count--;
585         if (buf->count < 0)
586                 BUG();
587         if (buf->count == 0) {
588                 BUG_ON(!list_empty(&buf->cache));
589                 BUG_ON(!list_empty(&buf->dirty));
590                 if (!radix_tree_lookup(&root->fs_info->cache_radix,
591                                        buf->blocknr))
592                         BUG();
593                 radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr);
594                 memset(buf, 0, sizeof(*buf));
595                 free(buf);
596                 BUG_ON(allocated_blocks == 0);
597                 allocated_blocks--;
598                 BUG_ON(root->fs_info->cache_size == 0);
599                 root->fs_info->cache_size--;
600         }
601 }
602