Stop using radix trees to record pending allocations
[platform/upstream/btrfs-progs.git] / disk-io.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #define _XOPEN_SOURCE 600
20 #define __USE_XOPEN2K
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/stat.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include "kerncompat.h"
28 #include "radix-tree.h"
29 #include "ctree.h"
30 #include "disk-io.h"
31 #include "transaction.h"
32 #include "crc32c.h"
33
34 static int allocated_blocks = 0;
35 int cache_max = 10000;
36
37 int btrfs_map_bh_to_logical(struct btrfs_root *root, struct btrfs_buffer *bh,
38                              u64 logical)
39 {
40         bh->fd = root->fs_info->fp;
41         bh->dev_blocknr = logical;
42         return 0;
43 }
44
45 static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf)
46 {
47         if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
48                 BUG();
49         if (memcmp(root->fs_info->disk_super->fsid, buf->node.header.fsid,
50                    sizeof(buf->node.header.fsid)))
51                 BUG();
52         return 0;
53 }
54
55 static int free_some_buffers(struct btrfs_root *root)
56 {
57         struct list_head *node, *next;
58         struct btrfs_buffer *b;
59         if (root->fs_info->cache_size < cache_max)
60                 return 0;
61         list_for_each_safe(node, next, &root->fs_info->cache) {
62                 b = list_entry(node, struct btrfs_buffer, cache);
63                 if (b->count == 1) {
64                         BUG_ON(!list_empty(&b->dirty));
65                         list_del_init(&b->cache);
66                         btrfs_block_release(root, b);
67                         if (root->fs_info->cache_size < cache_max)
68                                 break;
69                 }
70         }
71         return 0;
72 }
73
74 struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr)
75 {
76         struct btrfs_buffer *buf;
77         int ret;
78
79         buf = malloc(sizeof(struct btrfs_buffer) + root->sectorsize);
80         if (!buf)
81                 return buf;
82         allocated_blocks++;
83         buf->blocknr = blocknr;
84         buf->count = 2;
85         INIT_LIST_HEAD(&buf->dirty);
86         free_some_buffers(root);
87         radix_tree_preload(GFP_KERNEL);
88         ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf);
89         radix_tree_preload_end();
90         list_add_tail(&buf->cache, &root->fs_info->cache);
91         root->fs_info->cache_size++;
92         if (ret) {
93                 free(buf);
94                 return NULL;
95         }
96         return buf;
97 }
98
99 struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr)
100 {
101         struct btrfs_buffer *buf;
102         buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
103         if (buf) {
104                 buf->count++;
105         } else {
106                 buf = alloc_tree_block(root, blocknr);
107                 if (!buf) {
108                         BUG();
109                         return NULL;
110                 }
111         }
112         return buf;
113 }
114
115 struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr)
116 {
117         struct btrfs_buffer *buf;
118         int ret;
119         buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr);
120         if (buf) {
121                 buf->count++;
122                 if (check_tree_block(root, buf))
123                         BUG();
124         } else {
125                 buf = alloc_tree_block(root, blocknr);
126                 if (!buf)
127                         return NULL;
128                 btrfs_map_bh_to_logical(root, buf, blocknr);
129                 ret = pread(buf->fd, &buf->node, root->sectorsize,
130                             buf->dev_blocknr * root->sectorsize);
131                 if (ret != root->sectorsize) {
132                         free(buf);
133                         return NULL;
134                 }
135                 if (check_tree_block(root, buf))
136                         BUG();
137         }
138         return buf;
139 }
140
141 int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
142                      struct btrfs_buffer *buf)
143 {
144         if (!list_empty(&buf->dirty))
145                 return 0;
146         list_add_tail(&buf->dirty, &root->fs_info->trans);
147         buf->count++;
148         if (check_tree_block(root, buf))
149                 BUG();
150         return 0;
151 }
152
153 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
154                      struct btrfs_buffer *buf)
155 {
156         if (!list_empty(&buf->dirty)) {
157                 list_del_init(&buf->dirty);
158                 btrfs_block_release(root, buf);
159         }
160         return 0;
161 }
162
163 int btrfs_csum_node(struct btrfs_root *root, struct btrfs_node *node)
164 {
165         u32 crc;
166         size_t len = root->sectorsize - BTRFS_CSUM_SIZE;
167
168         crc = crc32c(0, (char *)(node) + BTRFS_CSUM_SIZE, len);
169         memcpy(node->header.csum, &crc, BTRFS_CRC32_SIZE);
170         return 0;
171 }
172
173 int btrfs_csum_super(struct btrfs_root *root, struct btrfs_super_block *super)
174 {
175         u32 crc;
176         char block[root->sectorsize];
177         size_t len = root->sectorsize - BTRFS_CSUM_SIZE;
178
179         memset(block, 0, root->sectorsize);
180         memcpy(block, super, sizeof(*super));
181
182         crc = crc32c(0, block + BTRFS_CSUM_SIZE, len);
183         memcpy(super->csum, &crc, BTRFS_CRC32_SIZE);
184         return 0;
185 }
186
187 int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
188                      struct btrfs_buffer *buf)
189 {
190         int ret;
191
192         if (buf->blocknr != btrfs_header_blocknr(&buf->node.header))
193                 BUG();
194         btrfs_map_bh_to_logical(root, buf, buf->blocknr);
195         if (check_tree_block(root, buf))
196                 BUG();
197
198         btrfs_csum_node(root, &buf->node);
199
200         ret = pwrite(buf->fd, &buf->node, root->sectorsize,
201                      buf->dev_blocknr * root->sectorsize);
202         if (ret != root->sectorsize)
203                 return ret;
204         return 0;
205 }
206
207 static int __commit_transaction(struct btrfs_trans_handle *trans, struct
208                                 btrfs_root *root)
209 {
210         struct btrfs_buffer *b;
211         int ret = 0;
212         int wret;
213         while(!list_empty(&root->fs_info->trans)) {
214                 b = list_entry(root->fs_info->trans.next, struct btrfs_buffer,
215                                dirty);
216                 list_del_init(&b->dirty);
217                 wret = write_tree_block(trans, root, b);
218                 if (wret)
219                         ret = wret;
220                 btrfs_block_release(root, b);
221         }
222         return ret;
223 }
224
225 static int commit_tree_roots(struct btrfs_trans_handle *trans,
226                              struct btrfs_fs_info *fs_info)
227 {
228         int ret;
229         u64 old_extent_block;
230         struct btrfs_root *tree_root = fs_info->tree_root;
231         struct btrfs_root *extent_root = fs_info->extent_root;
232
233         btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
234         while(1) {
235                 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
236                 if (old_extent_block == extent_root->node->blocknr)
237                         break;
238                 btrfs_set_root_blocknr(&extent_root->root_item,
239                                        extent_root->node->blocknr);
240                 ret = btrfs_update_root(trans, tree_root,
241                                         &extent_root->root_key,
242                                         &extent_root->root_item);
243                 BUG_ON(ret);
244                 btrfs_write_dirty_block_groups(trans, fs_info->extent_root);
245         }
246         return 0;
247 }
248
249 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct
250                              btrfs_root *root, struct btrfs_super_block *s)
251 {
252         int ret = 0;
253         struct btrfs_buffer *snap = root->commit_root;
254         struct btrfs_key snap_key;
255
256         if (root->commit_root == root->node)
257                 return 0;
258
259         memcpy(&snap_key, &root->root_key, sizeof(snap_key));
260         root->root_key.offset++;
261
262         btrfs_set_root_blocknr(&root->root_item, root->node->blocknr);
263         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
264                                 &root->root_key, &root->root_item);
265         BUG_ON(ret);
266
267         ret = commit_tree_roots(trans, root->fs_info);
268         BUG_ON(ret);
269
270         ret = __commit_transaction(trans, root);
271         BUG_ON(ret);
272
273         write_ctree_super(trans, root, s);
274         btrfs_finish_extent_commit(trans, root->fs_info->extent_root);
275         btrfs_finish_extent_commit(trans, root->fs_info->tree_root);
276
277         root->commit_root = root->node;
278         root->node->count++;
279         ret = btrfs_drop_snapshot(trans, root, snap);
280         BUG_ON(ret);
281
282         ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key);
283         BUG_ON(ret);
284         root->fs_info->generation = root->root_key.offset + 1;
285
286         return ret;
287 }
288
289 static int __setup_root(struct btrfs_super_block *super,
290                         struct btrfs_root *root,
291                         struct btrfs_fs_info *fs_info,
292                         u64 objectid, int fp)
293 {
294         root->node = NULL;
295         root->commit_root = NULL;
296         root->sectorsize = btrfs_super_sectorsize(super);
297         root->nodesize = btrfs_super_nodesize(super);
298         root->leafsize = btrfs_super_leafsize(super);
299         root->ref_cows = 0;
300         root->fs_info = fs_info;
301         memset(&root->root_key, 0, sizeof(root->root_key));
302         memset(&root->root_item, 0, sizeof(root->root_item));
303         root->root_key.objectid = objectid;
304         return 0;
305 }
306
307 static int find_and_setup_root(struct btrfs_super_block *super,
308                                struct btrfs_root *tree_root,
309                                struct btrfs_fs_info *fs_info,
310                                u64 objectid,
311                                struct btrfs_root *root, int fp)
312 {
313         int ret;
314
315         __setup_root(super, root, fs_info, objectid, fp);
316         ret = btrfs_find_last_root(tree_root, objectid,
317                                    &root->root_item, &root->root_key);
318         BUG_ON(ret);
319
320         root->node = read_tree_block(root,
321                                      btrfs_root_blocknr(&root->root_item));
322         BUG_ON(!root->node);
323         return 0;
324 }
325
326 struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super)
327 {
328         int fp;
329
330         fp = open(filename, O_CREAT | O_RDWR, 0600);
331         if (fp < 0) {
332                 return NULL;
333         }
334         return open_ctree_fd(fp, super);
335 }
336
337 struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super)
338 {
339         struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
340         struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
341         struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
342         struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info));
343         int ret;
344
345         INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL);
346         INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
347         INIT_LIST_HEAD(&fs_info->trans);
348         INIT_LIST_HEAD(&fs_info->cache);
349         pending_tree_init(&fs_info->pending_tree);
350         pending_tree_init(&fs_info->pinned_tree);
351         pending_tree_init(&fs_info->del_pending);
352         fs_info->cache_size = 0;
353         fs_info->fp = fp;
354         fs_info->running_transaction = NULL;
355         fs_info->fs_root = root;
356         fs_info->tree_root = tree_root;
357         fs_info->extent_root = extent_root;
358         fs_info->last_inode_alloc = 0;
359         fs_info->last_inode_alloc_dirid = 0;
360         fs_info->disk_super = super;
361         memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
362
363         ret = pread(fp, super, sizeof(struct btrfs_super_block),
364                      BTRFS_SUPER_INFO_OFFSET);
365         if (ret == 0 || btrfs_super_root(super) == 0) {
366                 BUG();
367                 return NULL;
368         }
369         BUG_ON(ret < 0);
370
371         __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp);
372         tree_root->node = read_tree_block(tree_root, btrfs_super_root(super));
373         BUG_ON(!tree_root->node);
374
375         ret = find_and_setup_root(super, tree_root, fs_info,
376                                   BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp);
377         BUG_ON(ret);
378
379         ret = find_and_setup_root(super, tree_root, fs_info,
380                                   BTRFS_FS_TREE_OBJECTID, root, fp);
381         BUG_ON(ret);
382
383         root->commit_root = root->node;
384         root->node->count++;
385         root->ref_cows = 1;
386         root->fs_info->generation = root->root_key.offset + 1;
387         btrfs_read_block_groups(root);
388         return root;
389 }
390
391 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
392                       *root, struct btrfs_super_block *s)
393 {
394         int ret;
395
396         btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr);
397         btrfs_csum_super(root, s);
398
399         ret = pwrite(root->fs_info->fp, s, sizeof(*s),
400                      BTRFS_SUPER_INFO_OFFSET);
401         if (ret != sizeof(*s)) {
402                 fprintf(stderr, "failed to write new super block err %d\n", ret);
403                 return ret;
404         }
405         return 0;
406 }
407
408 static int drop_cache(struct btrfs_root *root)
409 {
410         while(!list_empty(&root->fs_info->cache)) {
411                 struct btrfs_buffer *b = list_entry(root->fs_info->cache.next,
412                                                     struct btrfs_buffer,
413                                                     cache);
414                 list_del_init(&b->cache);
415                 btrfs_block_release(root, b);
416         }
417         return 0;
418 }
419
420 int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s)
421 {
422         int ret;
423         struct btrfs_trans_handle *trans;
424
425         trans = root->fs_info->running_transaction;
426         btrfs_commit_transaction(trans, root, s);
427         ret = commit_tree_roots(trans, root->fs_info);
428         BUG_ON(ret);
429         ret = __commit_transaction(trans, root);
430         BUG_ON(ret);
431         write_ctree_super(trans, root, s);
432         drop_cache(root);
433         BUG_ON(!list_empty(&root->fs_info->trans));
434
435         btrfs_free_block_groups(root->fs_info);
436         close(root->fs_info->fp);
437         if (root->node)
438                 btrfs_block_release(root, root->node);
439         if (root->fs_info->extent_root->node)
440                 btrfs_block_release(root->fs_info->extent_root,
441                                     root->fs_info->extent_root->node);
442         if (root->fs_info->tree_root->node)
443                 btrfs_block_release(root->fs_info->tree_root,
444                                     root->fs_info->tree_root->node);
445         btrfs_block_release(root, root->commit_root);
446         free(root);
447         printf("on close %d blocks are allocated\n", allocated_blocks);
448         return 0;
449 }
450
451 void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf)
452 {
453         buf->count--;
454         if (buf->count < 0)
455                 BUG();
456         if (buf->count == 0) {
457                 BUG_ON(!list_empty(&buf->cache));
458                 BUG_ON(!list_empty(&buf->dirty));
459                 if (!radix_tree_lookup(&root->fs_info->cache_radix,
460                                        buf->blocknr))
461                         BUG();
462                 radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr);
463                 memset(buf, 0, sizeof(*buf));
464                 free(buf);
465                 BUG_ON(allocated_blocks == 0);
466                 allocated_blocks--;
467                 BUG_ON(root->fs_info->cache_size == 0);
468                 root->fs_info->cache_size--;
469         }
470 }
471