2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public
4 * License v2 as published by the Free Software Foundation.
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9 * General Public License for more details.
11 * You should have received a copy of the GNU General Public
12 * License along with this program; if not, write to the
13 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
14 * Boston, MA 021110-1307, USA.
21 #include "transaction.h"
24 #include "check/mode-common.h"
27 * Check if the inode referenced by the given data reference uses the extent
28 * at disk_bytenr as a non-prealloc extent.
30 * Returns 1 if true, 0 if false and < 0 on error.
32 static int check_prealloc_data_ref(struct btrfs_fs_info *fs_info,
34 struct btrfs_extent_data_ref *dref,
35 struct extent_buffer *eb)
37 u64 rootid = btrfs_extent_data_ref_root(eb, dref);
38 u64 objectid = btrfs_extent_data_ref_objectid(eb, dref);
39 u64 offset = btrfs_extent_data_ref_offset(eb, dref);
40 struct btrfs_root *root;
42 struct btrfs_path path;
45 btrfs_init_path(&path);
46 key.objectid = rootid;
47 key.type = BTRFS_ROOT_ITEM_KEY;
49 root = btrfs_read_fs_root(fs_info, &key);
55 key.objectid = objectid;
56 key.type = BTRFS_EXTENT_DATA_KEY;
58 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
61 "Missing file extent item for inode %llu, root %llu, offset %llu",
62 objectid, rootid, offset);
69 struct btrfs_file_extent_item *fi;
72 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
73 ret = btrfs_next_leaf(root, &path);
80 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
81 if (key.objectid != objectid ||
82 key.type != BTRFS_EXTENT_DATA_KEY)
85 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
86 struct btrfs_file_extent_item);
87 extent_type = btrfs_file_extent_type(path.nodes[0], fi);
88 if (extent_type != BTRFS_FILE_EXTENT_REG &&
89 extent_type != BTRFS_FILE_EXTENT_PREALLOC)
92 if (btrfs_file_extent_disk_bytenr(path.nodes[0], fi) !=
96 if (extent_type == BTRFS_FILE_EXTENT_REG) {
105 btrfs_release_path(&path);
110 * Check if a shared data reference points to a node that has a file extent item
111 * pointing to the extent at @disk_bytenr that is not of type prealloc.
113 * Returns 1 if true, 0 if false and < 0 on error.
115 static int check_prealloc_shared_data_ref(struct btrfs_fs_info *fs_info,
116 u64 parent, u64 disk_bytenr)
118 struct extent_buffer *eb;
123 eb = read_tree_block(fs_info, parent, 0);
124 if (!extent_buffer_uptodate(eb)) {
129 nr = btrfs_header_nritems(eb);
130 for (i = 0; i < nr; i++) {
131 struct btrfs_key key;
132 struct btrfs_file_extent_item *fi;
135 btrfs_item_key_to_cpu(eb, &key, i);
136 if (key.type != BTRFS_EXTENT_DATA_KEY)
139 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
140 extent_type = btrfs_file_extent_type(eb, fi);
141 if (extent_type != BTRFS_FILE_EXTENT_REG &&
142 extent_type != BTRFS_FILE_EXTENT_PREALLOC)
145 if (btrfs_file_extent_disk_bytenr(eb, fi) == disk_bytenr &&
146 extent_type == BTRFS_FILE_EXTENT_REG) {
152 free_extent_buffer(eb);
157 * Check if a prealloc extent is shared by multiple inodes and if any inode has
158 * already written to that extent. This is to avoid emitting invalid warnings
159 * about odd csum items (a inode has an extent entirely marked as prealloc
160 * but another inode shares it and has already written to it).
162 * Note: right now it does not check if the number of checksum items in the
163 * csum tree matches the number of bytes written into the ex-prealloc extent.
164 * It's complex to deal with that because the prealloc extent might have been
165 * partially written through multiple inodes and we would have to keep track of
166 * ranges, merging them and notice ranges that fully or partially overlap, to
167 * avoid false reports of csum items missing for areas of the prealloc extent
168 * that were not written to - for example if we have a 1M prealloc extent, we
169 * can have only the first half of it written, but 2 different inodes refer to
170 * the its first half (through reflinks/cloning), so keeping a counter of bytes
171 * covered by checksum items is not enough, as the correct value would be 512K
172 * and not 1M (whence the need to track ranges).
174 * Returns 0 if the prealloc extent was not written yet by any inode, 1 if
175 * at least one other inode has written to it, and < 0 on error.
177 int check_prealloc_extent_written(struct btrfs_fs_info *fs_info,
178 u64 disk_bytenr, u64 num_bytes)
180 struct btrfs_path path;
181 struct btrfs_key key;
183 struct btrfs_extent_item *ei;
188 key.objectid = disk_bytenr;
189 key.type = BTRFS_EXTENT_ITEM_KEY;
190 key.offset = num_bytes;
192 btrfs_init_path(&path);
193 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
196 "Missing extent item in extent tree for disk_bytenr %llu, num_bytes %llu\n",
197 disk_bytenr, num_bytes);
203 /* First check all inline refs. */
204 ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
205 struct btrfs_extent_item);
206 item_size = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
207 ptr = (unsigned long)(ei + 1);
208 end = (unsigned long)ei + item_size;
210 struct btrfs_extent_inline_ref *iref;
213 iref = (struct btrfs_extent_inline_ref *)ptr;
214 type = btrfs_extent_inline_ref_type(path.nodes[0], iref);
215 ASSERT(type == BTRFS_EXTENT_DATA_REF_KEY ||
216 type == BTRFS_SHARED_DATA_REF_KEY);
218 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
219 struct btrfs_extent_data_ref *dref;
221 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
222 ret = check_prealloc_data_ref(fs_info, disk_bytenr,
223 dref, path.nodes[0]);
226 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
229 parent = btrfs_extent_inline_ref_offset(path.nodes[0],
231 ret = check_prealloc_shared_data_ref(fs_info,
238 ptr += btrfs_extent_inline_ref_size(type);
241 /* Now check if there are any non-inlined refs. */
244 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
245 ret = btrfs_next_leaf(fs_info->extent_root, &path);
254 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
255 if (key.objectid != disk_bytenr)
258 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
259 struct btrfs_extent_data_ref *dref;
261 dref = btrfs_item_ptr(path.nodes[0], path.slots[0],
262 struct btrfs_extent_data_ref);
263 ret = check_prealloc_data_ref(fs_info, disk_bytenr,
264 dref, path.nodes[0]);
267 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
268 ret = check_prealloc_shared_data_ref(fs_info,
278 btrfs_release_path(&path);
283 * Search in csum tree to find how many bytes of range [@start, @start + @len)
284 * has the corresponding csum item.
286 * @start: range start
288 * @found: return value of found csum bytes
291 int count_csum_range(struct btrfs_fs_info *fs_info, u64 start,
294 struct btrfs_key key;
295 struct btrfs_path path;
296 struct extent_buffer *leaf;
301 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
303 btrfs_init_path(&path);
305 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
307 key.type = BTRFS_EXTENT_CSUM_KEY;
309 ret = btrfs_search_slot(NULL, fs_info->csum_root,
313 if (ret > 0 && path.slots[0] > 0) {
314 leaf = path.nodes[0];
315 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
316 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
317 key.type == BTRFS_EXTENT_CSUM_KEY)
322 leaf = path.nodes[0];
323 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
324 ret = btrfs_next_leaf(fs_info->csum_root, &path);
329 leaf = path.nodes[0];
332 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
333 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
334 key.type != BTRFS_EXTENT_CSUM_KEY)
337 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
338 if (key.offset >= start + len)
341 if (key.offset > start)
344 size = btrfs_item_size_nr(leaf, path.slots[0]);
345 csum_end = key.offset + (size / csum_size) *
347 if (csum_end > start) {
348 size = min(csum_end - start, len);
357 btrfs_release_path(&path);
364 * Wrapper to insert one inode item into given @root
365 * Timestamp will be set to current time.
367 * @root: the root to insert inode item into
370 * @nbytes: nbytes (real used size, without hole)
371 * @nlink: number of links
372 * @mode: file mode, including S_IF* bits
374 int insert_inode_item(struct btrfs_trans_handle *trans,
375 struct btrfs_root *root, u64 ino, u64 size,
376 u64 nbytes, u64 nlink, u32 mode)
378 struct btrfs_inode_item ii;
379 time_t now = time(NULL);
382 btrfs_set_stack_inode_size(&ii, size);
383 btrfs_set_stack_inode_nbytes(&ii, nbytes);
384 btrfs_set_stack_inode_nlink(&ii, nlink);
385 btrfs_set_stack_inode_mode(&ii, mode);
386 btrfs_set_stack_inode_generation(&ii, trans->transid);
387 btrfs_set_stack_timespec_nsec(&ii.atime, 0);
388 btrfs_set_stack_timespec_sec(&ii.ctime, now);
389 btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
390 btrfs_set_stack_timespec_sec(&ii.mtime, now);
391 btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
392 btrfs_set_stack_timespec_sec(&ii.otime, 0);
393 btrfs_set_stack_timespec_nsec(&ii.otime, 0);
395 ret = btrfs_insert_inode(trans, root, ino, &ii);
398 warning("root %llu inode %llu recreating inode item, this may "
399 "be incomplete, please check permissions and content after "
400 "the fsck completes.\n", (unsigned long long)root->objectid,
401 (unsigned long long)ino);
406 static int get_highest_inode(struct btrfs_trans_handle *trans,
407 struct btrfs_root *root, struct btrfs_path *path,
410 struct btrfs_key key, found_key;
413 btrfs_init_path(path);
414 key.objectid = BTRFS_LAST_FREE_OBJECTID;
416 key.type = BTRFS_INODE_ITEM_KEY;
417 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
419 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
421 *highest_ino = found_key.objectid;
424 if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
426 btrfs_release_path(path);
431 * Link inode to dir 'lost+found'. Increase @ref_count.
433 * Returns 0 means success.
434 * Returns <0 means failure.
436 int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
437 struct btrfs_root *root,
438 struct btrfs_path *path,
439 u64 ino, char *namebuf, u32 name_len,
440 u8 filetype, u64 *ref_count)
442 char *dir_name = "lost+found";
447 btrfs_release_path(path);
448 ret = get_highest_inode(trans, root, path, &lost_found_ino);
453 ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
454 BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
457 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
460 ret = btrfs_add_link(trans, root, ino, lost_found_ino,
461 namebuf, name_len, filetype, NULL, 1, 0);
463 * Add ".INO" suffix several times to handle case where
464 * "FILENAME.INO" is already taken by another file.
466 while (ret == -EEXIST) {
468 * Conflicting file name, add ".INO" as suffix * +1 for '.'
470 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
474 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
476 name_len += count_digits(ino) + 1;
477 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
478 name_len, filetype, NULL, 1, 0);
481 error("failed to link the inode %llu to %s dir: %s",
482 ino, dir_name, strerror(-ret));
487 printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
488 name_len, namebuf, dir_name);
490 btrfs_release_path(path);
492 error("failed to move file '%.*s' to '%s' dir", name_len,
498 * Extra (optional) check for dev_item size to report possbile problem on a new
501 void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
503 if (!IS_ALIGNED(total_bytes, sectorsize)) {
505 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
506 devid, total_bytes, sectorsize);
508 "this is OK for older kernel, but may cause kernel warning for newer kernels");
509 warning("this can be fixed by 'btrfs rescue fix-device-size'");
513 void reada_walk_down(struct btrfs_root *root, struct extent_buffer *node,
516 struct btrfs_fs_info *fs_info = root->fs_info;
523 level = btrfs_header_level(node);
527 nritems = btrfs_header_nritems(node);
528 for (i = slot; i < nritems; i++) {
529 bytenr = btrfs_node_blockptr(node, i);
530 ptr_gen = btrfs_node_ptr_generation(node, i);
531 readahead_tree_block(fs_info, bytenr, ptr_gen);
536 * Check the child node/leaf by the following condition:
537 * 1. the first item key of the node/leaf should be the same with the one
539 * 2. block in parent node should match the child node/leaf.
540 * 3. generation of parent node and child's header should be consistent.
542 * Or the child node/leaf pointed by the key in parent is not valid.
544 * We hope to check leaf owner too, but since subvol may share leaves,
545 * which makes leaf owner check not so strong, key check should be
546 * sufficient enough for that case.
548 int check_child_node(struct extent_buffer *parent, int slot,
549 struct extent_buffer *child)
551 struct btrfs_key parent_key;
552 struct btrfs_key child_key;
555 btrfs_node_key_to_cpu(parent, &parent_key, slot);
556 if (btrfs_header_level(child) == 0)
557 btrfs_item_key_to_cpu(child, &child_key, 0);
559 btrfs_node_key_to_cpu(child, &child_key, 0);
561 if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
564 "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
565 parent_key.objectid, parent_key.type, parent_key.offset,
566 child_key.objectid, child_key.type, child_key.offset);
568 if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
570 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
571 btrfs_node_blockptr(parent, slot),
572 btrfs_header_bytenr(child));
574 if (btrfs_node_ptr_generation(parent, slot) !=
575 btrfs_header_generation(child)) {
577 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
578 btrfs_header_generation(child),
579 btrfs_node_ptr_generation(parent, slot));
584 void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
586 struct btrfs_block_group_cache *cache;
591 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
592 &start, &end, EXTENT_DIRTY);
595 clear_extent_dirty(&fs_info->free_space_cache, start, end);
600 cache = btrfs_lookup_first_block_group(fs_info, start);
605 start = cache->key.objectid + cache->key.offset;