1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2007 Oracle. All rights reserved.
9 #include "inode-item.h"
11 #include "transaction.h"
12 #include "print-tree.h"
13 #include "space-info.h"
14 #include "accessors.h"
15 #include "extent-tree.h"
16 #include "file-item.h"
18 struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
20 const struct fscrypt_str *name)
22 struct btrfs_inode_ref *ref;
24 unsigned long name_ptr;
29 item_size = btrfs_item_size(leaf, slot);
30 ptr = btrfs_item_ptr_offset(leaf, slot);
31 while (cur_offset < item_size) {
32 ref = (struct btrfs_inode_ref *)(ptr + cur_offset);
33 len = btrfs_inode_ref_name_len(leaf, ref);
34 name_ptr = (unsigned long)(ref + 1);
35 cur_offset += len + sizeof(*ref);
38 if (memcmp_extent_buffer(leaf, name->name, name_ptr,
45 struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
46 struct extent_buffer *leaf, int slot, u64 ref_objectid,
47 const struct fscrypt_str *name)
49 struct btrfs_inode_extref *extref;
51 unsigned long name_ptr;
56 item_size = btrfs_item_size(leaf, slot);
57 ptr = btrfs_item_ptr_offset(leaf, slot);
60 * Search all extended backrefs in this item. We're only
61 * looking through any collisions so most of the time this is
62 * just going to compare against one buffer. If all is well,
63 * we'll return success and the inode ref object.
65 while (cur_offset < item_size) {
66 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
67 name_ptr = (unsigned long)(&extref->name);
68 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
70 if (ref_name_len == name->len &&
71 btrfs_inode_extref_parent(leaf, extref) == ref_objectid &&
72 (memcmp_extent_buffer(leaf, name->name, name_ptr,
76 cur_offset += ref_name_len + sizeof(*extref);
81 /* Returns NULL if no extref found */
82 struct btrfs_inode_extref *
83 btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
84 struct btrfs_root *root,
85 struct btrfs_path *path,
86 const struct fscrypt_str *name,
87 u64 inode_objectid, u64 ref_objectid, int ins_len,
93 key.objectid = inode_objectid;
94 key.type = BTRFS_INODE_EXTREF_KEY;
95 key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
97 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
102 return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
107 static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root,
109 const struct fscrypt_str *name,
110 u64 inode_objectid, u64 ref_objectid,
113 struct btrfs_path *path;
114 struct btrfs_key key;
115 struct btrfs_inode_extref *extref;
116 struct extent_buffer *leaf;
118 int del_len = name->len + sizeof(*extref);
120 unsigned long item_start;
123 key.objectid = inode_objectid;
124 key.type = BTRFS_INODE_EXTREF_KEY;
125 key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
127 path = btrfs_alloc_path();
131 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
138 * Sanity check - did we find the right item for this name?
139 * This should always succeed so error here will make the FS
142 extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
145 btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
150 leaf = path->nodes[0];
151 item_size = btrfs_item_size(leaf, path->slots[0]);
153 *index = btrfs_inode_extref_index(leaf, extref);
155 if (del_len == item_size) {
157 * Common case only one ref in the item, remove the
160 ret = btrfs_del_item(trans, root, path);
164 ptr = (unsigned long)extref;
165 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
167 memmove_extent_buffer(leaf, ptr, ptr + del_len,
168 item_size - (ptr + del_len - item_start));
170 btrfs_truncate_item(path, item_size - del_len, 1);
173 btrfs_free_path(path);
178 int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
179 struct btrfs_root *root, const struct fscrypt_str *name,
180 u64 inode_objectid, u64 ref_objectid, u64 *index)
182 struct btrfs_path *path;
183 struct btrfs_key key;
184 struct btrfs_inode_ref *ref;
185 struct extent_buffer *leaf;
187 unsigned long item_start;
191 int search_ext_refs = 0;
192 int del_len = name->len + sizeof(*ref);
194 key.objectid = inode_objectid;
195 key.offset = ref_objectid;
196 key.type = BTRFS_INODE_REF_KEY;
198 path = btrfs_alloc_path();
202 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
207 } else if (ret < 0) {
211 ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name);
217 leaf = path->nodes[0];
218 item_size = btrfs_item_size(leaf, path->slots[0]);
221 *index = btrfs_inode_ref_index(leaf, ref);
223 if (del_len == item_size) {
224 ret = btrfs_del_item(trans, root, path);
227 ptr = (unsigned long)ref;
228 sub_item_len = name->len + sizeof(*ref);
229 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
230 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
231 item_size - (ptr + sub_item_len - item_start));
232 btrfs_truncate_item(path, item_size - sub_item_len, 1);
234 btrfs_free_path(path);
236 if (search_ext_refs) {
238 * No refs were found, or we could not find the
239 * name in our ref array. Find and remove the extended
242 return btrfs_del_inode_extref(trans, root, name,
243 inode_objectid, ref_objectid, index);
250 * btrfs_insert_inode_extref() - Inserts an extended inode ref into a tree.
252 * The caller must have checked against BTRFS_LINK_MAX already.
254 static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
255 struct btrfs_root *root,
256 const struct fscrypt_str *name,
257 u64 inode_objectid, u64 ref_objectid,
260 struct btrfs_inode_extref *extref;
262 int ins_len = name->len + sizeof(*extref);
264 struct btrfs_path *path;
265 struct btrfs_key key;
266 struct extent_buffer *leaf;
268 key.objectid = inode_objectid;
269 key.type = BTRFS_INODE_EXTREF_KEY;
270 key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);
272 path = btrfs_alloc_path();
276 ret = btrfs_insert_empty_item(trans, root, path, &key,
278 if (ret == -EEXIST) {
279 if (btrfs_find_name_in_ext_backref(path->nodes[0],
285 btrfs_extend_item(path, ins_len);
291 leaf = path->nodes[0];
292 ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
293 ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len;
294 extref = (struct btrfs_inode_extref *)ptr;
296 btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len);
297 btrfs_set_inode_extref_index(path->nodes[0], extref, index);
298 btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);
300 ptr = (unsigned long)&extref->name;
301 write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
302 btrfs_mark_buffer_dirty(path->nodes[0]);
305 btrfs_free_path(path);
309 /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
310 int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
311 struct btrfs_root *root, const struct fscrypt_str *name,
312 u64 inode_objectid, u64 ref_objectid, u64 index)
314 struct btrfs_fs_info *fs_info = root->fs_info;
315 struct btrfs_path *path;
316 struct btrfs_key key;
317 struct btrfs_inode_ref *ref;
320 int ins_len = name->len + sizeof(*ref);
322 key.objectid = inode_objectid;
323 key.offset = ref_objectid;
324 key.type = BTRFS_INODE_REF_KEY;
326 path = btrfs_alloc_path();
330 path->skip_release_on_error = 1;
331 ret = btrfs_insert_empty_item(trans, root, path, &key,
333 if (ret == -EEXIST) {
335 ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
340 old_size = btrfs_item_size(path->nodes[0], path->slots[0]);
341 btrfs_extend_item(path, ins_len);
342 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
343 struct btrfs_inode_ref);
344 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
345 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
346 btrfs_set_inode_ref_index(path->nodes[0], ref, index);
347 ptr = (unsigned long)(ref + 1);
349 } else if (ret < 0) {
350 if (ret == -EOVERFLOW) {
351 if (btrfs_find_name_in_backref(path->nodes[0],
360 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
361 struct btrfs_inode_ref);
362 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
363 btrfs_set_inode_ref_index(path->nodes[0], ref, index);
364 ptr = (unsigned long)(ref + 1);
366 write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
367 btrfs_mark_buffer_dirty(path->nodes[0]);
370 btrfs_free_path(path);
372 if (ret == -EMLINK) {
373 struct btrfs_super_block *disk_super = fs_info->super_copy;
374 /* We ran out of space in the ref array. Need to
375 * add an extended ref. */
376 if (btrfs_super_incompat_flags(disk_super)
377 & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
378 ret = btrfs_insert_inode_extref(trans, root, name,
380 ref_objectid, index);
386 int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
387 struct btrfs_root *root,
388 struct btrfs_path *path, u64 objectid)
390 struct btrfs_key key;
392 key.objectid = objectid;
393 key.type = BTRFS_INODE_ITEM_KEY;
396 ret = btrfs_insert_empty_item(trans, root, path, &key,
397 sizeof(struct btrfs_inode_item));
401 int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
402 *root, struct btrfs_path *path,
403 struct btrfs_key *location, int mod)
405 int ins_len = mod < 0 ? -1 : 0;
409 struct extent_buffer *leaf;
410 struct btrfs_key found_key;
412 ret = btrfs_search_slot(trans, root, location, path, ins_len, cow);
413 if (ret > 0 && location->type == BTRFS_ROOT_ITEM_KEY &&
414 location->offset == (u64)-1 && path->slots[0] != 0) {
415 slot = path->slots[0] - 1;
416 leaf = path->nodes[0];
417 btrfs_item_key_to_cpu(leaf, &found_key, slot);
418 if (found_key.objectid == location->objectid &&
419 found_key.type == location->type) {
427 static inline void btrfs_trace_truncate(struct btrfs_inode *inode,
428 struct extent_buffer *leaf,
429 struct btrfs_file_extent_item *fi,
430 u64 offset, int extent_type, int slot)
434 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
435 trace_btrfs_truncate_show_fi_inline(inode, leaf, fi, slot,
438 trace_btrfs_truncate_show_fi_regular(inode, leaf, fi, offset);
442 * Remove inode items from a given root.
444 * @trans: A transaction handle.
445 * @root: The root from which to remove items.
446 * @inode: The inode whose items we want to remove.
447 * @control: The btrfs_truncate_control to control how and what we
450 * Remove all keys associated with the inode from the given root that have a key
451 * with a type greater than or equals to @min_type. When @min_type has a value of
452 * BTRFS_EXTENT_DATA_KEY, only remove file extent items that have an offset value
453 * greater than or equals to @new_size. If a file extent item that starts before
454 * @new_size and ends after it is found, its length is adjusted.
456 * Returns: 0 on success, < 0 on error and NEED_TRUNCATE_BLOCK when @min_type is
457 * BTRFS_EXTENT_DATA_KEY and the caller must truncate the last block.
459 int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
460 struct btrfs_root *root,
461 struct btrfs_truncate_control *control)
463 struct btrfs_fs_info *fs_info = root->fs_info;
464 struct btrfs_path *path;
465 struct extent_buffer *leaf;
466 struct btrfs_file_extent_item *fi;
467 struct btrfs_key key;
468 struct btrfs_key found_key;
469 u64 new_size = control->new_size;
470 u64 extent_num_bytes = 0;
471 u64 extent_offset = 0;
473 u32 found_type = (u8)-1;
475 int pending_del_nr = 0;
476 int pending_del_slot = 0;
477 int extent_type = -1;
479 u64 bytes_deleted = 0;
480 bool be_nice = false;
482 ASSERT(control->inode || !control->clear_extent_range);
483 ASSERT(new_size == 0 || control->min_type == BTRFS_EXTENT_DATA_KEY);
485 control->last_size = new_size;
486 control->sub_bytes = 0;
489 * For shareable roots we want to back off from time to time, this turns
490 * out to be subvolume roots, reloc roots, and data reloc roots.
492 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
495 path = btrfs_alloc_path();
498 path->reada = READA_BACK;
500 key.objectid = control->ino;
501 key.offset = (u64)-1;
506 * With a 16K leaf size and 128MiB extents, you can actually queue up a
507 * huge file in a single leaf. Most of the time that bytes_deleted is
508 * > 0, it will be huge by the time we get here
510 if (be_nice && bytes_deleted > SZ_32M &&
511 btrfs_should_end_transaction(trans)) {
516 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
522 /* There are no items in the tree for us to truncate, we're done */
523 if (path->slots[0] == 0)
529 u64 clear_start = 0, clear_len = 0, extent_start = 0;
530 bool refill_delayed_refs_rsv = false;
533 leaf = path->nodes[0];
534 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
535 found_type = found_key.type;
537 if (found_key.objectid != control->ino)
540 if (found_type < control->min_type)
543 item_end = found_key.offset;
544 if (found_type == BTRFS_EXTENT_DATA_KEY) {
545 fi = btrfs_item_ptr(leaf, path->slots[0],
546 struct btrfs_file_extent_item);
547 extent_type = btrfs_file_extent_type(leaf, fi);
548 if (extent_type != BTRFS_FILE_EXTENT_INLINE)
550 btrfs_file_extent_num_bytes(leaf, fi);
551 else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
552 item_end += btrfs_file_extent_ram_bytes(leaf, fi);
554 btrfs_trace_truncate(control->inode, leaf, fi,
555 found_key.offset, extent_type,
559 if (found_type > control->min_type) {
562 if (item_end < new_size)
564 if (found_key.offset >= new_size)
570 /* FIXME, shrink the extent if the ref count is only 1 */
571 if (found_type != BTRFS_EXTENT_DATA_KEY)
574 control->extents_found++;
576 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
579 clear_start = found_key.offset;
580 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
583 btrfs_file_extent_num_bytes(leaf, fi);
584 extent_num_bytes = ALIGN(new_size -
586 fs_info->sectorsize);
587 clear_start = ALIGN(new_size, fs_info->sectorsize);
589 btrfs_set_file_extent_num_bytes(leaf, fi,
591 num_dec = (orig_num_bytes - extent_num_bytes);
592 if (extent_start != 0)
593 control->sub_bytes += num_dec;
594 btrfs_mark_buffer_dirty(leaf);
597 btrfs_file_extent_disk_num_bytes(leaf, fi);
598 extent_offset = found_key.offset -
599 btrfs_file_extent_offset(leaf, fi);
601 /* FIXME blocksize != 4096 */
602 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
603 if (extent_start != 0)
604 control->sub_bytes += num_dec;
607 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
609 * We can't truncate inline items that have had
613 btrfs_file_extent_encryption(leaf, fi) == 0 &&
614 btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
615 btrfs_file_extent_compression(leaf, fi) == 0) {
616 u32 size = (u32)(new_size - found_key.offset);
618 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
619 size = btrfs_file_extent_calc_inline_size(size);
620 btrfs_truncate_item(path, size, 1);
621 } else if (!del_item) {
623 * We have to bail so the last_size is set to
624 * just before this extent.
626 ret = BTRFS_NEED_TRUNCATE_BLOCK;
630 * Inline extents are special, we just treat
631 * them as a full sector worth in the file
632 * extent tree just for simplicity sake.
634 clear_len = fs_info->sectorsize;
637 control->sub_bytes += item_end + 1 - new_size;
641 * We only want to clear the file extent range if we're
642 * modifying the actual inode's mapping, which is just the
643 * normal truncate path.
645 if (control->clear_extent_range) {
646 ret = btrfs_inode_clear_file_extent_range(control->inode,
647 clear_start, clear_len);
649 btrfs_abort_transaction(trans, ret);
655 ASSERT(!pending_del_nr ||
656 ((path->slots[0] + 1) == pending_del_slot));
658 control->last_size = found_key.offset;
659 if (!pending_del_nr) {
660 /* No pending yet, add ourselves */
661 pending_del_slot = path->slots[0];
663 } else if (path->slots[0] + 1 == pending_del_slot) {
664 /* Hop on the pending chunk */
666 pending_del_slot = path->slots[0];
669 control->last_size = new_size;
673 if (del_item && extent_start != 0 && !control->skip_ref_updates) {
674 struct btrfs_ref ref = { 0 };
676 bytes_deleted += extent_num_bytes;
678 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
679 extent_start, extent_num_bytes, 0);
680 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
681 control->ino, extent_offset,
682 root->root_key.objectid, false);
683 ret = btrfs_free_extent(trans, &ref);
685 btrfs_abort_transaction(trans, ret);
688 if (be_nice && btrfs_check_space_for_delayed_refs(fs_info))
689 refill_delayed_refs_rsv = true;
692 if (found_type == BTRFS_INODE_ITEM_KEY)
695 if (path->slots[0] == 0 ||
696 path->slots[0] != pending_del_slot ||
697 refill_delayed_refs_rsv) {
698 if (pending_del_nr) {
699 ret = btrfs_del_items(trans, root, path,
703 btrfs_abort_transaction(trans, ret);
708 btrfs_release_path(path);
711 * We can generate a lot of delayed refs, so we need to
712 * throttle every once and a while and make sure we're
713 * adding enough space to keep up with the work we are
714 * generating. Since we hold a transaction here we
715 * can't flush, and we don't want to FLUSH_LIMIT because
716 * we could have generated too many delayed refs to
717 * actually allocate, so just bail if we're short and
718 * let the normal reservation dance happen higher up.
720 if (refill_delayed_refs_rsv) {
721 ret = btrfs_delayed_refs_rsv_refill(fs_info,
722 BTRFS_RESERVE_NO_FLUSH);
734 if (ret >= 0 && pending_del_nr) {
737 err = btrfs_del_items(trans, root, path, pending_del_slot,
740 btrfs_abort_transaction(trans, err);
745 ASSERT(control->last_size >= new_size);
746 if (!ret && control->last_size > new_size)
747 control->last_size = new_size;
749 btrfs_free_path(path);