#include <linux/kernel.h>
#include <linux/bio.h>
+ #include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
struct btrfs_dio_data {
u64 reserve;
- loff_t length;
- ssize_t submitted;
- struct extent_changeset *data_reserved;
+ u64 unsubmitted_oe_range_start;
+ u64 unsubmitted_oe_range_end;
+ int overwrite;
};
static const struct inode_operations btrfs_dir_inode_operations;
truncate_setsize(inode, newsize);
+ /* Disable nonlocked read DIO to avoid the endless truncate */
+ btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
inode_dio_wait(inode);
+ btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
ret = btrfs_truncate(inode, newsize == oldsize);
if (ret && inode->i_nlink) {
/*
* Keep looping until we have no more ranges in the io tree.
- * We can have ongoing bios started by readpages (called from readahead)
- * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+ * We can have ongoing bios started by readahead that have
+ * their endio callback (extent_io.c:end_bio_extent_readpage)
* still in progress (unlocked the pages in the bio but did not yet
* unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before
}
static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
- struct extent_state **cached_state, bool writing)
+ struct extent_state **cached_state, int writing)
{
struct btrfs_ordered_extent *ordered;
int ret = 0;
* for it to complete) and then invalidate the pages for
* this range (through invalidate_inode_pages2_range()),
* but that can lead us to a deadlock with a concurrent
- * call to readpages() (a buffered read or a defrag call
+ * call to readahead (a buffered read or a defrag call
* triggered a readahead) on a page lock due to an
* ordered dio extent we created before but did not have
* yet a corresponding bio submitted (whence it can not
- * complete), which makes readpages() wait for that
+ * complete), which makes readahead wait for that
* ordered extent to complete while holding a lock on
* that page.
*/
}
+ static int btrfs_get_blocks_direct_read(struct extent_map *em,
+ struct buffer_head *bh_result,
+ struct inode *inode,
+ u64 start, u64 len)
+ {
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ return -ENOENT;
+
+ len = min(len, em->len - (start - em->start));
+
+ bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+ inode->i_blkbits;
+ bh_result->b_size = len;
+ bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
+ set_buffer_mapped(bh_result);
+
+ return 0;
+ }
+
static int btrfs_get_blocks_direct_write(struct extent_map **map,
+ struct buffer_head *bh_result,
struct inode *inode,
struct btrfs_dio_data *dio_data,
u64 start, u64 len)
}
/* this will cow the extent */
+ len = bh_result->b_size;
free_extent_map(em);
*map = em = btrfs_new_extent_direct(inode, start, len);
if (IS_ERR(em)) {
len = min(len, em->len - (start - em->start));
skip_cow:
+ bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+ inode->i_blkbits;
+ bh_result->b_size = len;
+ bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
+ set_buffer_mapped(bh_result);
+
+ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ set_buffer_new(bh_result);
+
/*
* Need to update the i_size under the extent lock so buffered
* readers will get the updated i_size when we unlock.
*/
- if (start + len > i_size_read(inode))
+ if (!dio_data->overwrite && start + len > i_size_read(inode))
i_size_write(inode, start + len);
+ WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len;
+ dio_data->unsubmitted_oe_range_end = start + len;
+ current->journal_info = dio_data;
out:
return ret;
}
- static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
- loff_t length, unsigned flags, struct iomap *iomap,
- struct iomap *srcmap)
+ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em;
struct extent_state *cached_state = NULL;
struct btrfs_dio_data *dio_data = NULL;
+ u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
- const bool write = !!(flags & IOMAP_WRITE);
+ u64 len = bh_result->b_size;
int ret = 0;
- u64 len = length;
- bool unlock_extents = false;
- if (!write)
+ if (!create)
len = min_t(u64, len, fs_info->sectorsize);
lockstart = start;
lockend = start + len - 1;
- /*
- * The generic stuff only does filemap_write_and_wait_range, which
- * isn't enough if we've written compressed pages to this area, so we
- * need to flush the dirty pages again to make absolutely sure that any
- * outstanding dirty pages are on disk.
- */
- if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- ret = filemap_fdatawrite_range(inode->i_mapping, start,
- start + length - 1);
-
- dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS);
- if (!dio_data)
- return -ENOMEM;
-
- dio_data->length = length;
- if (write) {
- dio_data->reserve = round_up(length, fs_info->sectorsize);
- ret = btrfs_delalloc_reserve_space(inode,
- &dio_data->data_reserved,
- start, dio_data->reserve);
- if (ret) {
- extent_changeset_free(dio_data->data_reserved);
- kfree(dio_data);
- return ret;
- }
+ if (current->journal_info) {
+ /*
+ * Need to pull our outstanding extents and set journal_info to NULL so
+ * that anything that needs to check if there's a transaction doesn't get
+ * confused.
+ */
+ dio_data = current->journal_info;
+ current->journal_info = NULL;
}
- iomap->private = dio_data;
-
/*
* If this errors out it's because we couldn't invalidate pagecache for
* this range and we need to fallback to buffered.
*/
- if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) {
+ if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
+ create)) {
ret = -ENOTBLK;
goto err;
}
goto unlock_err;
}
- len = min(len, em->len - (start - em->start));
- if (write) {
- ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
- start, len);
+ if (create) {
+ ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
+ dio_data, start, len);
if (ret < 0)
goto unlock_err;
- unlock_extents = true;
- /* Recalc len in case the new em is smaller than requested */
- len = min(len, em->len - (start - em->start));
+
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, &cached_state);
} else {
+ ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
+ start, len);
+ /* Can be negative only if we read from a hole */
+ if (ret < 0) {
+ ret = 0;
+ free_extent_map(em);
+ goto unlock_err;
+ }
/*
* We need to unlock only the end area that we aren't using.
* The rest is going to be unlocked by the endio routine.
*/
- lockstart = start + len;
- if (lockstart < lockend)
- unlock_extents = true;
- }
-
- if (unlock_extents)
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- lockstart, lockend, &cached_state);
- else
- free_extent_state(cached_state);
-
- /*
- * Translate extent map information to iomap.
- * We trim the extents (and move the addr) even though iomap code does
- * that, since we have locked only the parts we are performing I/O in.
- */
- if ((em->block_start == EXTENT_MAP_HOLE) ||
- (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) {
- iomap->addr = IOMAP_NULL_ADDR;
- iomap->type = IOMAP_HOLE;
- } else {
- iomap->addr = em->block_start + (start - em->start);
- iomap->type = IOMAP_MAPPED;
+ lockstart = start + bh_result->b_size;
+ if (lockstart < lockend) {
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+ lockstart, lockend, &cached_state);
+ } else {
+ free_extent_state(cached_state);
+ }
}
- iomap->offset = start;
- iomap->bdev = fs_info->fs_devices->latest_bdev;
- iomap->length = len;
free_extent_map(em);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
err:
- if (dio_data) {
- btrfs_delalloc_release_space(inode, dio_data->data_reserved,
- start, dio_data->reserve, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve);
- extent_changeset_free(dio_data->data_reserved);
- kfree(dio_data);
- }
- return ret;
- }
-
- static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
- ssize_t written, unsigned flags, struct iomap *iomap)
- {
- int ret = 0;
- struct btrfs_dio_data *dio_data = iomap->private;
- size_t submitted = dio_data->submitted;
- const bool write = !!(flags & IOMAP_WRITE);
-
- if (!write && (iomap->type == IOMAP_HOLE)) {
- /* If reading from a hole, unlock and return */
- unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1);
- goto out;
- }
-
- if (submitted < length) {
- pos += submitted;
- length -= submitted;
- if (write)
- __endio_write_update_ordered(inode, pos, length, false);
- else
- unlock_extent(&BTRFS_I(inode)->io_tree, pos,
- pos + length - 1);
- ret = -ENOTBLK;
- }
-
- if (write) {
- if (dio_data->reserve)
- btrfs_delalloc_release_space(inode,
- dio_data->data_reserved, pos,
- dio_data->reserve, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length);
- extent_changeset_free(dio_data->data_reserved);
- }
- out:
- kfree(dio_data);
- iomap->private = NULL;
-
+ if (dio_data)
+ current->journal_info = dio_data;
return ret;
}
dip->logical_offset + dip->bytes - 1);
}
- bio_endio(dip->dio_bio);
+ dio_end_io(dip->dio_bio);
kfree(dip);
}
dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
dip->dio_bio = dio_bio;
refcount_set(&dip->refs, 1);
+
+ if (write) {
+ struct btrfs_dio_data *dio_data = current->journal_info;
+
+ /*
+ * Setting range start and end to the same value means that
+ * no cleanup will happen in btrfs_direct_IO
+ */
+ dio_data->unsubmitted_oe_range_end = dip->logical_offset +
+ dip->bytes;
+ dio_data->unsubmitted_oe_range_start =
+ dio_data->unsubmitted_oe_range_end;
+ }
return dip;
}
- static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
- struct bio *dio_bio, loff_t file_offset)
+ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
+ loff_t file_offset)
{
const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
int ret;
blk_status_t status;
struct btrfs_io_geometry geom;
- struct btrfs_dio_data *dio_data = iomap->private;
dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
if (!dip) {
file_offset + dio_bio->bi_iter.bi_size - 1);
}
dio_bio->bi_status = BLK_STS_RESOURCE;
- bio_endio(dio_bio);
- return BLK_QC_T_NONE;
+ dio_end_io(dio_bio);
+ return;
}
if (!write && csum) {
goto out_err;
}
- dio_data->submitted += clone_len;
clone_offset += clone_len;
start_sector += clone_len >> 9;
file_offset += clone_len;
} while (submit_len > 0);
- return BLK_QC_T_NONE;
+ return;
out_err:
dip->dio_bio->bi_status = status;
btrfs_dio_private_put(dip);
- return BLK_QC_T_NONE;
}
- const struct iomap_ops btrfs_dio_iomap_ops = {
- .iomap_begin = btrfs_dio_iomap_begin,
- .iomap_end = btrfs_dio_iomap_end,
- };
+ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
+ const struct iov_iter *iter, loff_t offset)
+ {
+ int seg;
+ int i;
+ unsigned int blocksize_mask = fs_info->sectorsize - 1;
+ ssize_t retval = -EINVAL;
- const struct iomap_dio_ops btrfs_dops = {
- .submit_io = btrfs_submit_direct,
- };
+ if (offset & blocksize_mask)
+ goto out;
+
+ if (iov_iter_alignment(iter) & blocksize_mask)
+ goto out;
+
+ /* If this is a write we don't need to check anymore */
+ if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
+ return 0;
+ /*
+ * Check to make sure we don't have duplicate iov_base's in this
+ * iovec, if so return EINVAL, otherwise we'll get csum errors
+ * when reading back.
+ */
+ for (seg = 0; seg < iter->nr_segs; seg++) {
+ for (i = seg + 1; i < iter->nr_segs; i++) {
+ if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
+ goto out;
+ }
+ }
+ retval = 0;
+ out:
+ return retval;
+ }
+
+ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+ {
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_dio_data dio_data = { 0 };
+ struct extent_changeset *data_reserved = NULL;
+ loff_t offset = iocb->ki_pos;
+ size_t count = 0;
+ int flags = 0;
+ bool wakeup = true;
+ bool relock = false;
+ ssize_t ret;
+
+ if (check_direct_IO(fs_info, iter, offset))
+ return 0;
+
+ inode_dio_begin(inode);
+
+ /*
+ * The generic stuff only does filemap_write_and_wait_range, which
+ * isn't enough if we've written compressed pages to this area, so
+ * we need to flush the dirty pages again to make absolutely sure
+ * that any outstanding dirty pages are on disk.
+ */
+ count = iov_iter_count(iter);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ filemap_fdatawrite_range(inode->i_mapping, offset,
+ offset + count - 1);
+
+ if (iov_iter_rw(iter) == WRITE) {
+ /*
+ * If the write DIO is beyond the EOF, we need update
+ * the isize, but it is protected by i_mutex. So we can
+ * not unlock the i_mutex at this case.
+ */
+ if (offset + count <= inode->i_size) {
+ dio_data.overwrite = 1;
+ inode_unlock(inode);
+ relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ offset, count);
+ if (ret)
+ goto out;
+
+ /*
+ * We need to know how many extents we reserved so that we can
+ * do the accounting properly if we go over the number we
+ * originally calculated. Abuse current->journal_info for this.
+ */
+ dio_data.reserve = round_up(count,
+ fs_info->sectorsize);
+ dio_data.unsubmitted_oe_range_start = (u64)offset;
+ dio_data.unsubmitted_oe_range_end = (u64)offset;
+ current->journal_info = &dio_data;
+ down_read(&BTRFS_I(inode)->dio_sem);
+ } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
+ &BTRFS_I(inode)->runtime_flags)) {
+ inode_dio_end(inode);
+ flags = DIO_LOCKING | DIO_SKIP_HOLES;
+ wakeup = false;
+ }
+
+ ret = __blockdev_direct_IO(iocb, inode,
+ fs_info->fs_devices->latest_bdev,
+ iter, btrfs_get_blocks_direct, NULL,
+ btrfs_submit_direct, flags);
+ if (iov_iter_rw(iter) == WRITE) {
+ up_read(&BTRFS_I(inode)->dio_sem);
+ current->journal_info = NULL;
+ if (ret < 0 && ret != -EIOCBQUEUED) {
+ if (dio_data.reserve)
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, dio_data.reserve, true);
+ /*
+ * On error we might have left some ordered extents
+ * without submitting corresponding bios for them, so
+ * cleanup them up to avoid other tasks getting them
+ * and waiting for them to complete forever.
+ */
+ if (dio_data.unsubmitted_oe_range_start <
+ dio_data.unsubmitted_oe_range_end)
+ __endio_write_update_ordered(inode,
+ dio_data.unsubmitted_oe_range_start,
+ dio_data.unsubmitted_oe_range_end -
+ dio_data.unsubmitted_oe_range_start,
+ false);
+ } else if (ret >= 0 && (size_t)ret < count)
+ btrfs_delalloc_release_space(inode, data_reserved,
+ offset, count - (size_t)ret, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), count);
+ }
+ out:
+ if (wakeup)
+ inode_dio_end(inode);
+ if (relock)
+ inode_lock(inode);
+
+ extent_changeset_free(data_reserved);
+ return ret;
+ }
-#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
-
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
int ret;
- ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
+ ret = fiemap_prep(inode, fieinfo, start, &len, 0);
if (ret)
return ret;
return extent_writepages(mapping, wbc);
}
-static int
-btrfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static void btrfs_readahead(struct readahead_control *rac)
{
- return extent_readpages(mapping, pages, nr_pages);
+ extent_readahead(rac);
}
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
int ret = try_release_extent_mapping(page, gfp_flags);
- if (ret == 1) {
- ClearPagePrivate(page);
- set_page_private(page, 0);
- put_page(page);
- }
+ if (ret == 1)
+ detach_page_private(page);
return ret;
}
if (ret != MIGRATEPAGE_SUCCESS)
return ret;
- if (page_has_private(page)) {
- ClearPagePrivate(page);
- get_page(newpage);
- set_page_private(newpage, page_private(page));
- set_page_private(page, 0);
- put_page(page);
- SetPagePrivate(newpage);
- }
+ if (page_has_private(page))
+ attach_page_private(newpage, detach_page_private(page));
if (PagePrivate2(page)) {
ClearPagePrivate2(page);
}
ClearPageChecked(page);
- if (PagePrivate(page)) {
- ClearPagePrivate(page);
- set_page_private(page, 0);
- put_page(page);
- }
+ detach_page_private(page);
}
/*
.readpage = btrfs_readpage,
.writepage = btrfs_writepage,
.writepages = btrfs_writepages,
- .readpages = btrfs_readpages,
+ .readahead = btrfs_readahead,
- .direct_IO = noop_direct_IO,
+ .direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
#ifdef CONFIG_MIGRATION
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fcntl.h>
-#include <linux/fiemap.h>
#include <linux/rculist_bl.h>
#include <linux/atomic.h>
#include <linux/shrinker.h>
struct bdi_writeback;
struct bio;
struct export_operations;
+struct fiemap_extent_info;
struct hd_geometry;
struct iovec;
struct kiocb;
struct page;
struct address_space;
struct writeback_control;
+struct readahead_control;
/*
* Write life time hint values.
*/
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
+ void (*readahead)(struct readahead_control *);
int (*write_begin)(struct file *, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
errseq_t f_wb_err;
+ errseq_t f_sb_err; /* for syncfs */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
bool (*lm_break)(struct file_lock *);
int (*lm_change)(struct file_lock *, int, struct list_head *);
void (*lm_setup)(struct file_lock *, void **);
+ bool (*lm_breaker_owns_lease)(struct file_lock *);
};
struct lock_manager {
#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
#define SB_I_UNTRUSTED_MOUNTER 0x00000040
+#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */
+
/* Possible states of 'frozen' field */
enum {
SB_UNFROZEN = 0, /* FS is unfrozen */
/* Being remounted read-only */
int s_readonly_remount;
+ /* per-sb errseq_t for reporting writeback errors via syncfs */
+ errseq_t s_wb_err;
+
/* AIO completions deferred from interrupt context */
struct workqueue_struct *s_dio_done_wq;
struct hlist_head s_pins;
*
* Since page fault freeze protection behaves as a lock, users have to preserve
* ordering of freeze protection and other filesystem locks. It is advised to
- * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
+ * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
* handling code implies lock dependency:
*
- * mmap_sem
+ * mmap_lock
* -> sb_start_pagefault
*/
static inline void sb_start_pagefault(struct super_block *sb)
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
-extern int vfs_whiteout(struct inode *, struct dentry *);
+
+static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+ return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
int open_flag);
extern void inode_init_owner(struct inode *inode, const struct inode *dir,
umode_t mode);
extern bool may_open_dev(const struct path *path);
-/*
- * VFS FS_IOC_FIEMAP helper definitions.
- */
-struct fiemap_extent_info {
- unsigned int fi_flags; /* Flags as passed from user */
- unsigned int fi_extents_mapped; /* Number of mapped extents */
- unsigned int fi_extents_max; /* Size of fiemap_extent array */
- struct fiemap_extent __user *fi_extents_start; /* Start of
- fiemap_extent array */
-};
-int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
- u64 phys, u64 len, u32 flags);
-int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
/*
* This is the "filldir" function type, used by readdir() to let
*
* I_CREATING New object's inode in the middle of setting up.
*
+ * I_DONTCACHE Evict inode as soon as it is not used anymore.
+ *
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
#define I_DIRTY_SYNC (1 << 0)
#define I_WB_SWITCH (1 << 13)
#define I_OVL_INUSE (1 << 14)
#define I_CREATING (1 << 15)
+#define I_DONTCACHE (1 << 16)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
#ifdef CONFIG_BLOCK
extern int register_blkdev(unsigned int, const char *);
extern void unregister_blkdev(unsigned int, const char *);
-extern void bdev_unhash_inode(dev_t dev);
extern struct block_device *bdget(dev_t);
extern struct block_device *bdgrab(struct block_device *bdev);
extern void bd_set_size(struct block_device *, loff_t size);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
#ifdef CONFIG_BLOCK
-extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
extern int revalidate_disk(struct gendisk *);
extern int check_disk_change(struct block_device *);
extern int __invalidate_device(struct block_device *, bool);
-extern int invalidate_partition(struct gendisk *, int);
#endif
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
return errseq_sample(&mapping->wb_err);
}
+/**
+ * file_sample_sb_err - sample the current errseq_t to test for later errors
+ * @mapping: mapping to be sampled
+ *
+ * Grab the most current superblock-level errseq_t value for the given
+ * struct file.
+ */
+static inline errseq_t file_sample_sb_err(struct file *file)
+{
+ return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
+}
+
static inline int filemap_nr_thps(struct address_space *mapping)
{
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
extern int generic_delete_inode(struct inode *inode);
static inline int generic_drop_inode(struct inode *inode)
{
- return !inode->i_nlink || inode_unhashed(inode);
+ return !inode->i_nlink || inode_unhashed(inode) ||
+ (inode->i_state & I_DONTCACHE);
}
+extern void d_mark_dontcache(struct inode *inode);
extern struct inode *ilookup5_nowait(struct super_block *sb,
unsigned long hashval, int (*test)(struct inode *, void *),
int (*match)(struct inode *,
unsigned long, void *),
void *data);
+extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
+ int (*)(struct inode *, void *), void *);
+extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
extern int insert_inode_locked(struct inode *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
DIO_SKIP_HOLES = 0x02,
};
+ void dio_end_io(struct bio *bio);
+
ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, struct iov_iter *iter,
get_block_t get_block,
extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
extern int vfs_readlink(struct dentry *, char __user *, int);
-extern int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo,
- loff_t start, loff_t len,
- get_block_t *get_block);
-extern int generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block);
-
extern struct file_system_type *get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
extern int file_update_time(struct file *file);
-static inline bool io_is_direct(struct file *filp)
-{
- return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
-}
-
static inline bool vma_is_dax(const struct vm_area_struct *vma)
{
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
int res = 0;
if (file->f_flags & O_APPEND)
res |= IOCB_APPEND;
- if (io_is_direct(file))
+ if (file->f_flags & O_DIRECT)
res |= IOCB_DIRECT;
if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
res |= IOCB_DSYNC;
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
int proc_nr_dentry(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
int proc_nr_inodes(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
#define __FMODE_EXEC ((__force int) FMODE_EXEC)