return rc;
}
-/* copied from fs/locks.c with a name change */
-#define cifs_for_each_lock(inode, lockp) \
- for (lockp = &inode->i_flock; *lockp != NULL; \
- lockp = &(*lockp)->fl_next)
-
struct lock_to_push {
struct list_head llist;
__u64 offset;
{
struct inode *inode = cfile->dentry->d_inode;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
- struct file_lock *flock, **before;
- unsigned int count = 0, i = 0;
+ struct file_lock *flock;
+ struct file_lock_context *flctx = inode->i_flctx;
+ unsigned int i;
int rc = 0, xid, type;
struct list_head locks_to_send, *el;
struct lock_to_push *lck, *tmp;
xid = get_xid();
- spin_lock(&inode->i_lock);
- cifs_for_each_lock(inode, before) {
- if ((*before)->fl_flags & FL_POSIX)
- count++;
- }
- spin_unlock(&inode->i_lock);
+ if (!flctx)
+ goto out;
INIT_LIST_HEAD(&locks_to_send);
/*
- * Allocating count locks is enough because no FL_POSIX locks can be
- * added to the list while we are holding cinode->lock_sem that
+ * Allocating flc_posix_cnt locks is enough because no FL_POSIX locks
+ * can be added to the list while we are holding cinode->lock_sem that
* protects locking operations of this inode.
*/
- for (; i < count; i++) {
+ for (i = 0; i < flctx->flc_posix_cnt; i++) {
lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
if (!lck) {
rc = -ENOMEM;
}
el = locks_to_send.next;
- spin_lock(&inode->i_lock);
- cifs_for_each_lock(inode, before) {
- flock = *before;
- if ((flock->fl_flags & FL_POSIX) == 0)
- continue;
+ spin_lock(&flctx->flc_lock);
+ list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
if (el == &locks_to_send) {
/*
* The list ended. We don't have enough allocated
lck->length = length;
lck->type = type;
lck->offset = flock->fl_start;
- el = el->next;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&flctx->flc_lock);
list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
int stored_rc;
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = cifs_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
#ifdef CONFIG_FSNOTIFY
inode->i_fsnotify_mask = 0;
#endif
-
+ inode->i_flctx = NULL;
this_cpu_inc(nr_inodes);
return 0;
BUG_ON(inode_has_buffers(inode));
security_inode_free(inode);
fsnotify_inode_delete(inode);
+ locks_free_lock_context(inode->i_flctx);
if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
atomic_long_dec(&inode->i_sb->s_remove_count);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
mapping->i_mmap = RB_ROOT;
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
}
EXPORT_SYMBOL(address_space_init_once);
ol_dqblk_block_off(sb, c, off);
}
- /* Compute block number from given offset */
- static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
- {
- return off >> sb->s_blocksize_bits;
- }
-
static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
{
return off & ((1 << sb->s_blocksize_bits) - 1);
ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
OCFS2_LOCAL_INFO_OFF);
spin_lock(&dq_data_lock);
- ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
+ ldinfo->dqi_flags = cpu_to_le32(oinfo->dqi_flags);
ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
spin_unlock(&dq_data_lock);
/* We don't need the lock and we have to acquire quota file locks
* which will later depend on this lock */
mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
- info->dqi_maxblimit = 0x7fffffffffffffffLL;
- info->dqi_maxilimit = 0x7fffffffffffffffLL;
+ info->dqi_max_spc_limit = 0x7fffffffffffffffLL;
+ info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
if (!oinfo) {
mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
}
ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
OCFS2_LOCAL_INFO_OFF);
- info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
+ oinfo->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
oinfo->dqi_libh = bh;
/* We crashed when using local quota file? */
- if (!(info->dqi_flags & OLQF_CLEAN)) {
+ if (!(oinfo->dqi_flags & OLQF_CLEAN)) {
rec = OCFS2_SB(sb)->quota_rec;
if (!rec) {
rec = ocfs2_alloc_quota_recovery();
}
/* Now mark quota file as used */
- info->dqi_flags &= ~OLQF_CLEAN;
+ oinfo->dqi_flags &= ~OLQF_CLEAN;
status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info);
if (status < 0) {
mlog_errno(status);
goto out;
/* Mark local file as clean */
- info->dqi_flags |= OLQF_CLEAN;
+ oinfo->dqi_flags |= OLQF_CLEAN;
status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
oinfo->dqi_libh,
olq_update_info,
Opt_coherency_full,
Opt_resv_level,
Opt_dir_resv_level,
+ Opt_journal_async_commit,
Opt_err,
};
{Opt_coherency_full, "coherency=full"},
{Opt_resv_level, "resv_level=%u"},
{Opt_dir_resv_level, "dir_resv_level=%u"},
+ {Opt_journal_async_commit, "journal_async_commit"},
{Opt_err, NULL}
};
}
}
-/* Handle quota on quotactl */
-static int ocfs2_quota_on(struct super_block *sb, int type, int format_id)
-{
- unsigned int feature[OCFS2_MAXQUOTAS] = {
- OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
- OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
-
- if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
- return -EINVAL;
-
- return dquot_enable(sb_dqopt(sb)->files[type], type,
- format_id, DQUOT_LIMITS_ENABLED);
-}
-
-/* Handle quota off quotactl */
-static int ocfs2_quota_off(struct super_block *sb, int type)
-{
- return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
-}
-
-static const struct quotactl_ops ocfs2_quotactl_ops = {
- .quota_on_meta = ocfs2_quota_on,
- .quota_off = ocfs2_quota_off,
- .quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
- .set_info = dquot_set_dqinfo,
- .get_dqblk = dquot_get_dqblk,
- .set_dqblk = dquot_set_dqblk,
-};
-
static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
{
struct dentry *root;
option < OCFS2_MAX_RESV_LEVEL)
mopt->dir_resv_level = option;
break;
+ case Opt_journal_async_commit:
+ mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+ break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
if (osb->osb_dir_resv_level != osb->osb_resv_level)
seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
+ if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
+ seq_printf(s, ",journal_async_commit");
+
return 0;
}
sb->s_op = &ocfs2_sops;
sb->s_d_op = &ocfs2_dentry_ops;
sb->s_export_op = &ocfs2_export_ops;
- sb->s_qcop = &ocfs2_quotactl_ops;
+ sb->s_qcop = &dquot_quotactl_sysfile_ops;
sb->dq_op = &ocfs2_quota_operations;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
sb->s_xattr = ocfs2_xattr_handlers;
goto finally;
}
+ if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
+ jbd2_journal_set_features(osb->journal->j_journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
+ else
+ jbd2_journal_clear_features(osb->journal->j_journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
+
if (dirty) {
/* recover my local alloc if we didn't unmount cleanly. */
status = ocfs2_begin_local_alloc_recovery(osb,
return (-status);
}
+int
+xfs_update_prealloc_flags(
+ struct xfs_inode *ip,
+ enum xfs_prealloc_flags flags)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
+ error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+ if (!(flags & XFS_PREALLOC_INVISIBLE)) {
+ ip->i_d.di_mode &= ~S_ISUID;
+ if (ip->i_d.di_mode & S_IXGRP)
+ ip->i_d.di_mode &= ~S_ISGID;
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ }
+
+ if (flags & XFS_PREALLOC_SET)
+ ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
+ if (flags & XFS_PREALLOC_CLEAR)
+ ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ if (flags & XFS_PREALLOC_SYNC)
+ xfs_trans_set_sync(tp);
+ return xfs_trans_commit(tp, 0);
+}
+
/*
* Fsync operations on directories are much simpler than on regular files,
* as there is no file data to flush, and thus also no need for explicit
{
struct inode *inode = file_inode(file);
struct xfs_inode *ip = XFS_I(inode);
- struct xfs_trans *tp;
long error;
+ enum xfs_prealloc_flags flags = 0;
loff_t new_size = 0;
if (!S_ISREG(inode->i_mode))
if (error)
goto out_unlock;
} else {
+ flags |= XFS_PREALLOC_SET;
+
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
goto out_unlock;
}
- tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
- error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- goto out_unlock;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- ip->i_d.di_mode &= ~S_ISUID;
- if (ip->i_d.di_mode & S_IXGRP)
- ip->i_d.di_mode &= ~S_ISGID;
-
- if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
- ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
if (file->f_flags & O_DSYNC)
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
+ flags |= XFS_PREALLOC_SYNC;
+
+ error = xfs_update_prealloc_flags(ip, flags);
if (error)
goto out_unlock;
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = xfs_vm_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
spinlock_t tree_lock; /* and lock protecting it */
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root i_mmap; /* tree of private and shared mappings */
- struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */
*/
static inline int mapping_mapped(struct address_space *mapping)
{
- return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
- !list_empty(&mapping->i_mmap_nonlinear);
+ return !RB_EMPTY_ROOT(&mapping->i_mmap);
}
/*
atomic_t i_readcount; /* struct files open RO */
#endif
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
- struct file_lock *i_flock;
+ struct file_lock_context *i_flctx;
struct address_space i_data;
struct list_head i_devices;
union {
/* legacy typedef, should eventually be removed */
typedef void *fl_owner_t;
+struct file_lock;
+
struct file_lock_operations {
void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
void (*fl_release_private)(struct file_lock *);
void (*lm_notify)(struct file_lock *); /* unblock callback */
int (*lm_grant)(struct file_lock *, int);
bool (*lm_break)(struct file_lock *);
- int (*lm_change)(struct file_lock **, int, struct list_head *);
+ int (*lm_change)(struct file_lock *, int, struct list_head *);
void (*lm_setup)(struct file_lock *, void **);
};
* FIXME: should we create a separate "struct lock_request" to help distinguish
* these two uses?
*
- * The i_flock list is ordered by:
+ * The varous i_flctx lists are ordered by:
*
- * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX
- * 2) lock owner
- * 3) lock range start
- * 4) lock range end
+ * 1) lock owner
+ * 2) lock range start
+ * 3) lock range end
*
* Obviously, the last two criteria only matter for POSIX locks.
*/
struct file_lock {
struct file_lock *fl_next; /* singly linked list for this inode */
+ struct list_head fl_list; /* link into file_lock_context */
struct hlist_node fl_link; /* node in global lists */
struct list_head fl_block; /* circular list of blocked processes */
fl_owner_t fl_owner;
} fl_u;
};
+struct file_lock_context {
+ spinlock_t flc_lock;
+ struct list_head flc_flock;
+ struct list_head flc_posix;
+ struct list_head flc_lease;
+ int flc_flock_cnt;
+ int flc_posix_cnt;
+ int flc_lease_cnt;
+};
+
/* The following constant reflects the upper bound of the file/locking space */
#ifndef OFFSET_MAX
#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
extern int fcntl_getlease(struct file *filp);
/* fs/locks.c */
+void locks_free_lock_context(struct file_lock_context *ctx);
void locks_free_lock(struct file_lock *fl);
extern void locks_init_lock(struct file_lock *);
extern struct file_lock * locks_alloc_lock(void);
extern void lease_get_mtime(struct inode *, struct timespec *time);
extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
-extern int lease_modify(struct file_lock **, int, struct list_head *);
+extern int lease_modify(struct file_lock *, int, struct list_head *);
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user)
return F_UNLCK;
}
+static inline void
+locks_free_lock_context(struct file_lock_context *ctx)
+{
+}
+
static inline void locks_init_lock(struct file_lock *fl)
{
return;
return -EINVAL;
}
-static inline int lease_modify(struct file_lock **before, int arg,
+static inline int lease_modify(struct file_lock *fl, int arg,
struct list_head *dispose)
{
return -EINVAL;
struct file *filp,
loff_t size)
{
- if (inode->i_flock && mandatory_lock(inode))
+ if (inode->i_flctx && mandatory_lock(inode))
return locks_mandatory_area(
FLOCK_VERIFY_WRITE, inode, filp,
size < inode->i_size ? size : inode->i_size,
{
/*
* Since this check is lockless, we must ensure that any refcounts
- * taken are done before checking inode->i_flock. Otherwise, we could
- * end up racing with tasks trying to set a new lease on this file.
+ * taken are done before checking i_flctx->flc_lease. Otherwise, we
+ * could end up racing with tasks trying to set a new lease on this
+ * file.
*/
smp_mb();
- if (inode->i_flock)
+ if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
return __break_lease(inode, mode, FL_LEASE);
return 0;
}
{
/*
* Since this check is lockless, we must ensure that any refcounts
- * taken are done before checking inode->i_flock. Otherwise, we could
- * end up racing with tasks trying to set a new lease on this file.
+ * taken are done before checking i_flctx->flc_lease. Otherwise, we
+ * could end up racing with tasks trying to set a new lease on this
+ * file.
*/
smp_mb();
- if (inode->i_flock)
+ if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease))
return __break_lease(inode, mode, FL_DELEG);
return 0;
}
extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
- extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
- #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
#define VM_ARCH_2 0x02000000
#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
extern pgprot_t protection_map[16];
#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
- #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
- #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
- #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
- #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
- #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
- #define FAULT_FLAG_TRIED 0x40 /* second try */
- #define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
+ #define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
+ #define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
+ #define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
+ #define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
+ #define FAULT_FLAG_TRIED 0x20 /* Second try */
+ #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
* ->fault function. The vma's ->fault is responsible for returning a bitmask
* of VM_FAULT_xxx flags that give details about how the fault was handled.
*
- * pgoff should be used in favour of virtual_address, if possible. If pgoff
- * is used, one may implement ->remap_pages to get nonlinear mapping support.
+ * pgoff should be used in favour of virtual_address, if possible.
*/
struct vm_fault {
unsigned int flags; /* FAULT_FLAG_xxx flags */
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
unsigned long addr);
#endif
- /* called by sys_remap_file_pages() to populate non-linear mapping */
- int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
-
+ /*
+ * Called by vm_normal_page() for special PTEs to find the
+ * page for @addr. This is useful if the default behavior
+ * (using pte_page()) would not find the correct page.
+ */
+ struct page *(*find_special_page)(struct vm_area_struct *vma,
+ unsigned long addr);
};
struct mmu_gather;
return tail;
}
+ /*
+ * Since either compound page could be dismantled asynchronously in THP
+ * or we access asynchronously arbitrary positioned struct page, there
+ * would be tail flag race. To handle this race, we should call
+ * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
+ */
static inline struct page *compound_head(struct page *page)
{
if (unlikely(PageTail(page)))
}
/*
+ * If we access compound page synchronously such as access to
+ * allocated page, there is no need to handle tail flag race, so we can
+ * check tail flag directly without any synchronization primitive.
+ */
+ static inline struct page *compound_head_fast(struct page *page)
+ {
+ if (unlikely(PageTail(page)))
+ return page->first_page;
+ return page;
+ }
+
+ /*
* The atomic page->_mapcount, starts from -1: so that transitions
* both from it and to it can be tracked, using atomic_inc_and_test
* and atomic_add_negative(-1).
static inline struct page *virt_to_head_page(const void *x)
{
struct page *page = virt_to_page(x);
- return compound_head(page);
+
+ /*
+ * We don't need to worry about synchronization of tail flag
+ * when we call virt_to_head_page() since it is only called for
+ * already allocated page and this page won't be freed until
+ * this virt_to_head_page() is finished. So use _fast variant.
+ */
+ return compound_head_fast(page);
}
/*
* Parameter block passed down to zap_pte_range in exceptional cases.
*/
struct zap_details {
- struct vm_area_struct *nonlinear_vma; /* Check page->index if set */
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
for (vma = vma_interval_tree_iter_first(root, start, last); \
vma; vma = vma_interval_tree_iter_next(vma, start, last))
- static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
- struct list_head *list)
- {
- list_add_tail(&vma->shared.nonlinear, list);
- }
-
void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
struct rb_root *root);
void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
if (HAVE_PTE_SPECIAL) {
if (likely(!pte_special(pte)))
goto check_pfn;
+ if (vma->vm_ops && vma->vm_ops->find_special_page)
+ return vma->vm_ops->find_special_page(vma, addr);
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
if (!is_zero_pfn(pfn))
/* pte contains position in swap or file, so copy. */
if (unlikely(!pte_present(pte))) {
- if (!pte_file(pte)) {
- swp_entry_t entry = pte_to_swp_entry(pte);
-
- if (likely(!non_swap_entry(entry))) {
- if (swap_duplicate(entry) < 0)
- return entry.val;
-
- /* make sure dst_mm is on swapoff's mmlist. */
- if (unlikely(list_empty(&dst_mm->mmlist))) {
- spin_lock(&mmlist_lock);
- if (list_empty(&dst_mm->mmlist))
- list_add(&dst_mm->mmlist,
- &src_mm->mmlist);
- spin_unlock(&mmlist_lock);
- }
- rss[MM_SWAPENTS]++;
- } else if (is_migration_entry(entry)) {
- page = migration_entry_to_page(entry);
-
- if (PageAnon(page))
- rss[MM_ANONPAGES]++;
- else
- rss[MM_FILEPAGES]++;
-
- if (is_write_migration_entry(entry) &&
- is_cow_mapping(vm_flags)) {
- /*
- * COW mappings require pages in both
- * parent and child to be set to read.
- */
- make_migration_entry_read(&entry);
- pte = swp_entry_to_pte(entry);
- if (pte_swp_soft_dirty(*src_pte))
- pte = pte_swp_mksoft_dirty(pte);
- set_pte_at(src_mm, addr, src_pte, pte);
- }
+ swp_entry_t entry = pte_to_swp_entry(pte);
+
+ if (likely(!non_swap_entry(entry))) {
+ if (swap_duplicate(entry) < 0)
+ return entry.val;
+
+ /* make sure dst_mm is on swapoff's mmlist. */
+ if (unlikely(list_empty(&dst_mm->mmlist))) {
+ spin_lock(&mmlist_lock);
+ if (list_empty(&dst_mm->mmlist))
+ list_add(&dst_mm->mmlist,
+ &src_mm->mmlist);
+ spin_unlock(&mmlist_lock);
+ }
+ rss[MM_SWAPENTS]++;
+ } else if (is_migration_entry(entry)) {
+ page = migration_entry_to_page(entry);
+
+ if (PageAnon(page))
+ rss[MM_ANONPAGES]++;
+ else
+ rss[MM_FILEPAGES]++;
+
+ if (is_write_migration_entry(entry) &&
+ is_cow_mapping(vm_flags)) {
+ /*
+ * COW mappings require pages in both
+ * parent and child to be set to read.
+ */
+ make_migration_entry_read(&entry);
+ pte = swp_entry_to_pte(entry);
+ if (pte_swp_soft_dirty(*src_pte))
+ pte = pte_swp_mksoft_dirty(pte);
+ set_pte_at(src_mm, addr, src_pte, pte);
}
}
goto out_set_pte;
* readonly mappings. The tradeoff is that copy_page_range is more
* efficient than faulting.
*/
- if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR |
- VM_PFNMAP | VM_MIXEDMAP))) {
- if (!vma->anon_vma)
- return 0;
- }
+ if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) &&
+ !vma->anon_vma)
+ return 0;
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst_mm, src_mm, vma);
spinlock_t *ptl;
pte_t *start_pte;
pte_t *pte;
+ swp_entry_t entry;
again:
init_rss_vec(rss);
if (details->check_mapping &&
details->check_mapping != page->mapping)
continue;
- /*
- * Each page->index must be checked when
- * invalidating or truncating nonlinear.
- */
- if (details->nonlinear_vma &&
- (page->index < details->first_index ||
- page->index > details->last_index))
- continue;
}
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
- if (unlikely(details) && details->nonlinear_vma
- && linear_page_index(details->nonlinear_vma,
- addr) != page->index) {
- pte_t ptfile = pgoff_to_pte(page->index);
- if (pte_soft_dirty(ptent))
- ptfile = pte_file_mksoft_dirty(ptfile);
- set_pte_at(mm, addr, pte, ptfile);
- }
if (PageAnon(page))
rss[MM_ANONPAGES]--;
else {
}
continue;
}
- /*
- * If details->check_mapping, we leave swap entries;
- * if details->nonlinear_vma, we leave file entries.
- */
+ /* If details->check_mapping, we leave swap entries. */
if (unlikely(details))
continue;
- if (pte_file(ptent)) {
- if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
- print_bad_pte(vma, addr, ptent, NULL);
- } else {
- swp_entry_t entry = pte_to_swp_entry(ptent);
- if (!non_swap_entry(entry))
- rss[MM_SWAPENTS]--;
- else if (is_migration_entry(entry)) {
- struct page *page;
+ entry = pte_to_swp_entry(ptent);
+ if (!non_swap_entry(entry))
+ rss[MM_SWAPENTS]--;
+ else if (is_migration_entry(entry)) {
+ struct page *page;
- page = migration_entry_to_page(entry);
+ page = migration_entry_to_page(entry);
- if (PageAnon(page))
- rss[MM_ANONPAGES]--;
- else
- rss[MM_FILEPAGES]--;
- }
- if (unlikely(!free_swap_and_cache(entry)))
- print_bad_pte(vma, addr, ptent, NULL);
+ if (PageAnon(page))
+ rss[MM_ANONPAGES]--;
+ else
+ rss[MM_FILEPAGES]--;
}
+ if (unlikely(!free_swap_and_cache(entry)))
+ print_bad_pte(vma, addr, ptent, NULL);
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, addr != end);
pgd_t *pgd;
unsigned long next;
- if (details && !details->check_mapping && !details->nonlinear_vma)
+ if (details && !details->check_mapping)
details = NULL;
BUG_ON(addr >= end);
* @vma: vm_area_struct holding the applicable pages
* @start: starting address of pages to zap
* @size: number of bytes to zap
- * @details: details of nonlinear truncation or shared cache invalidation
+ * @details: details of shared cache invalidation
*
* Caller must protect the VMA list
*/
* @vma: vm_area_struct holding the applicable pages
* @address: starting address of pages to zap
* @size: number of bytes to zap
- * @details: details of nonlinear truncation or shared cache invalidation
+ * @details: details of shared cache invalidation
*
* The range must fit into one VMA.
*/
EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
- * handle_pte_fault chooses page fault handler according to an entry
- * which was read non-atomically. Before making any commitment, on
- * those architectures or configurations (e.g. i386 with PAE) which
- * might give a mix of unmatched parts, do_swap_page and do_nonlinear_fault
- * must check under lock before unmapping the pte and proceeding
- * (but do_wp_page is only called after already making such a check;
+ * handle_pte_fault chooses page fault handler according to an entry which was
+ * read non-atomically. Before making any commitment, on those architectures
+ * or configurations (e.g. i386 with PAE) which might give a mix of unmatched
+ * parts, do_swap_page must check under lock before unmapping the pte and
+ * proceeding (but do_wp_page is only called after already making such a check;
* and do_anonymous_page can safely check later on).
*/
static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
pte_t entry;
int ret = 0;
int page_mkwrite = 0;
- struct page *dirty_page = NULL;
+ bool dirty_shared = false;
unsigned long mmun_start = 0; /* For mmu_notifiers */
unsigned long mmun_end = 0; /* For mmu_notifiers */
struct mem_cgroup *memcg;
unlock_page(old_page);
} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
(VM_WRITE|VM_SHARED))) {
+ page_cache_get(old_page);
/*
* Only catch write-faults on shared writable pages,
* read-only shared pages can get COWed by
*/
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
int tmp;
- page_cache_get(old_page);
+
pte_unmap_unlock(page_table, ptl);
tmp = do_page_mkwrite(vma, old_page, address);
if (unlikely(!tmp || (tmp &
unlock_page(old_page);
goto unlock;
}
-
page_mkwrite = 1;
}
- dirty_page = old_page;
- get_page(dirty_page);
+
+ dirty_shared = true;
reuse:
/*
pte_unmap_unlock(page_table, ptl);
ret |= VM_FAULT_WRITE;
- if (!dirty_page)
- return ret;
-
- if (!page_mkwrite) {
+ if (dirty_shared) {
struct address_space *mapping;
int dirtied;
- lock_page(dirty_page);
- dirtied = set_page_dirty(dirty_page);
- VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page);
- mapping = dirty_page->mapping;
- unlock_page(dirty_page);
+ if (!page_mkwrite)
+ lock_page(old_page);
- if (dirtied && mapping) {
+ dirtied = set_page_dirty(old_page);
+ VM_BUG_ON_PAGE(PageAnon(old_page), old_page);
+ mapping = old_page->mapping;
+ unlock_page(old_page);
+ page_cache_release(old_page);
+
+ if ((dirtied || page_mkwrite) && mapping) {
/*
* Some device drivers do not set page.mapping
* but still dirty their pages
balance_dirty_pages_ratelimited(mapping);
}
- /* file_update_time outside page_lock */
- if (vma->vm_file)
+ if (!page_mkwrite)
file_update_time(vma->vm_file);
}
- put_page(dirty_page);
- if (page_mkwrite) {
- struct address_space *mapping = dirty_page->mapping;
-
- set_page_dirty(dirty_page);
- unlock_page(dirty_page);
- page_cache_release(dirty_page);
- if (mapping) {
- /*
- * Some device drivers do not set page.mapping
- * but still dirty their pages
- */
- balance_dirty_pages_ratelimited(mapping);
- }
- }
return ret;
}
}
}
- static inline void unmap_mapping_range_list(struct list_head *head,
- struct zap_details *details)
- {
- struct vm_area_struct *vma;
-
- /*
- * In nonlinear VMAs there is no correspondence between virtual address
- * offset and file offset. So we must perform an exhaustive search
- * across *all* the pages in each nonlinear VMA, not just the pages
- * whose virtual address lies outside the file truncation point.
- */
- list_for_each_entry(vma, head, shared.nonlinear) {
- details->nonlinear_vma = vma;
- unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
- }
- }
-
/**
- * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file.
+ * unmap_mapping_range - unmap the portion of all mmaps in the specified
+ * address_space corresponding to the specified page range in the underlying
+ * file.
+ *
* @mapping: the address space containing mmaps to be unmapped.
* @holebegin: byte in first page to unmap, relative to the start of
* the underlying file. This will be rounded down to a PAGE_SIZE
}
details.check_mapping = even_cows? NULL: mapping;
- details.nonlinear_vma = NULL;
details.first_index = hba;
details.last_index = hba + hlen - 1;
if (details.last_index < details.first_index)
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
- if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
- unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
i_mmap_unlock_write(mapping);
}
EXPORT_SYMBOL(unmap_mapping_range);
entry = mk_pte(page, vma->vm_page_prot);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
- entry = pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
* if page by the offset is not ready to be mapped (cold cache or
* something).
*/
- if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
- fault_around_bytes >> PAGE_SHIFT > 1) {
+ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
do_fault_around(vma, address, pte, pgoff, flags);
if (!pte_same(*pte, orig_pte))
balance_dirty_pages_ratelimited(mapping);
}
- /* file_update_time outside page_lock */
- if (vma->vm_file && !vma->vm_ops->page_mkwrite)
+ if (!vma->vm_ops->page_mkwrite)
file_update_time(vma->vm_file);
return ret;
* The mmap_sem may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
- static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
{
return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
- /*
- * Fault of a previously existing named mapping. Repopulate the pte
- * from the encoded file_pte if possible. This enables swappable
- * nonlinear vmas.
- *
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
- * We return with pte unmapped and unlocked.
- * The mmap_sem may have been released depending on flags and our
- * return value. See filemap_fault() and __lock_page_or_retry().
- */
- static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
- unsigned int flags, pte_t orig_pte)
- {
- pgoff_t pgoff;
-
- flags |= FAULT_FLAG_NONLINEAR;
-
- if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
- return 0;
-
- if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
- /*
- * Page table corrupted: show pte and kill process.
- */
- print_bad_pte(vma, address, orig_pte, NULL);
- return VM_FAULT_SIGBUS;
- }
-
- pgoff = pte_to_pgoff(orig_pte);
- if (!(flags & FAULT_FLAG_WRITE))
- return do_read_fault(mm, vma, address, pmd, pgoff, flags,
- orig_pte);
- if (!(vma->vm_flags & VM_SHARED))
- return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
- orig_pte);
- return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
- }
-
static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
unsigned long addr, int page_nid,
int *flags)
if (pte_none(entry)) {
if (vma->vm_ops) {
if (likely(vma->vm_ops->fault))
- return do_linear_fault(mm, vma, address,
- pte, pmd, flags, entry);
+ return do_fault(mm, vma, address, pte,
+ pmd, flags, entry);
}
return do_anonymous_page(mm, vma, address,
pte, pmd, flags);
}
- if (pte_file(entry))
- return do_nonlinear_fault(mm, vma, address,
- pte, pmd, flags, entry);
return do_swap_page(mm, vma, address,
pte, pmd, flags, entry);
}