mm, compaction: ignore pageblock skip when manually invoking compaction
[platform/adaptation/renesas_rcar/renesas_kernel.git] / mm / shmem.c
index 902a148..f0d698b 100644 (file)
@@ -45,7 +45,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/xattr.h>
 #include <linux/exportfs.h>
 #include <linux/posix_acl.h>
-#include <linux/generic_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/mman.h>
 #include <linux/string.h>
 #include <linux/slab.h>
@@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt;
 #define SHORT_SYMLINK_LEN 128
 
 /*
- * shmem_fallocate and shmem_writepage communicate via inode->i_private
- * (with i_mutex making sure that it has only one user at a time):
- * we would prefer not to enlarge the shmem inode just for that.
+ * shmem_fallocate communicates with shmem_fault or shmem_writepage via
+ * inode->i_private (with i_mutex making sure that it has only one user at
+ * a time): we would prefer not to enlarge the shmem inode just for that.
  */
 struct shmem_falloc {
+       wait_queue_head_t *waitq; /* faults into hole wait for punch to end */
        pgoff_t start;          /* start of range currently being fallocated */
        pgoff_t next;           /* the next page offset to be fallocated */
        pgoff_t nr_falloced;    /* how many new pages have been fallocated */
@@ -285,8 +286,8 @@ static int shmem_add_to_page_cache(struct page *page,
 {
        int error;
 
-       VM_BUG_ON(!PageLocked(page));
-       VM_BUG_ON(!PageSwapBacked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 
        page_cache_get(page);
        page->mapping = mapping;
@@ -491,7 +492,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                                continue;
                        if (!unfalloc || !PageUptodate(page)) {
                                if (page->mapping == mapping) {
-                                       VM_BUG_ON(PageWriteback(page));
+                                       VM_BUG_ON_PAGE(PageWriteback(page), page);
                                        truncate_inode_page(mapping, page);
                                }
                        }
@@ -533,22 +534,19 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                return;
 
        index = start;
-       for ( ; ; ) {
+       while (index < end) {
                cond_resched();
                pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
                                min(end - index, (pgoff_t)PAGEVEC_SIZE),
                                                        pvec.pages, indices);
                if (!pvec.nr) {
-                       if (index == start || unfalloc)
+                       /* If all gone or hole-punch or unfalloc, we're done */
+                       if (index == start || end != -1)
                                break;
+                       /* But if truncating, restart to make sure all gone */
                        index = start;
                        continue;
                }
-               if ((index == start || unfalloc) && indices[0] >= end) {
-                       shmem_deswap_pagevec(&pvec);
-                       pagevec_release(&pvec);
-                       break;
-               }
                mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
@@ -560,16 +558,25 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        if (radix_tree_exceptional_entry(page)) {
                                if (unfalloc)
                                        continue;
-                               nr_swaps_freed += !shmem_free_swap(mapping,
-                                                               index, page);
+                               if (shmem_free_swap(mapping, index, page)) {
+                                       /* Swap was replaced by page: retry */
+                                       index--;
+                                       break;
+                               }
+                               nr_swaps_freed++;
                                continue;
                        }
 
                        lock_page(page);
                        if (!unfalloc || !PageUptodate(page)) {
                                if (page->mapping == mapping) {
-                                       VM_BUG_ON(PageWriteback(page));
+                                       VM_BUG_ON_PAGE(PageWriteback(page), page);
                                        truncate_inode_page(mapping, page);
+                               } else {
+                                       /* Page was replaced by swap: retry */
+                                       unlock_page(page);
+                                       index--;
+                                       break;
                                }
                        }
                        unlock_page(page);
@@ -620,10 +627,8 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        setattr_copy(inode, attr);
-#ifdef CONFIG_TMPFS_POSIX_ACL
        if (attr->ia_valid & ATTR_MODE)
-               error = generic_acl_chmod(inode);
-#endif
+               error = posix_acl_chmod(inode, inode->i_mode);
        return error;
 }
 
@@ -826,6 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
                        spin_lock(&inode->i_lock);
                        shmem_falloc = inode->i_private;
                        if (shmem_falloc &&
+                           !shmem_falloc->waitq &&
                            index >= shmem_falloc->start &&
                            index < shmem_falloc->next)
                                shmem_falloc->nr_unswapped++;
@@ -1300,6 +1306,64 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        int error;
        int ret = VM_FAULT_LOCKED;
 
+       /*
+        * Trinity finds that probing a hole which tmpfs is punching can
+        * prevent the hole-punch from ever completing: which in turn
+        * locks writers out with its hold on i_mutex.  So refrain from
+        * faulting pages into the hole while it's being punched.  Although
+        * shmem_undo_range() does remove the additions, it may be unable to
+        * keep up, as each new page needs its own unmap_mapping_range() call,
+        * and the i_mmap tree grows ever slower to scan if new vmas are added.
+        *
+        * It does not matter if we sometimes reach this check just before the
+        * hole-punch begins, so that one fault then races with the punch:
+        * we just need to make racing faults a rare case.
+        *
+        * The implementation below would be much simpler if we just used a
+        * standard mutex or completion: but we cannot take i_mutex in fault,
+        * and bloating every shmem inode for this unlikely case would be sad.
+        */
+       if (unlikely(inode->i_private)) {
+               struct shmem_falloc *shmem_falloc;
+
+               spin_lock(&inode->i_lock);
+               shmem_falloc = inode->i_private;
+               if (shmem_falloc &&
+                   shmem_falloc->waitq &&
+                   vmf->pgoff >= shmem_falloc->start &&
+                   vmf->pgoff < shmem_falloc->next) {
+                       wait_queue_head_t *shmem_falloc_waitq;
+                       DEFINE_WAIT(shmem_fault_wait);
+
+                       ret = VM_FAULT_NOPAGE;
+                       if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) &&
+                          !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+                               /* It's polite to up mmap_sem if we can */
+                               up_read(&vma->vm_mm->mmap_sem);
+                               ret = VM_FAULT_RETRY;
+                       }
+
+                       shmem_falloc_waitq = shmem_falloc->waitq;
+                       prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       spin_unlock(&inode->i_lock);
+                       schedule();
+
+                       /*
+                        * shmem_falloc_waitq points into the shmem_fallocate()
+                        * stack of the hole-punching task: shmem_falloc_waitq
+                        * is usually invalid by the time we reach here, but
+                        * finish_wait() does not dereference it in that case;
+                        * though i_lock needed lest racing with wake_up_all().
+                        */
+                       spin_lock(&inode->i_lock);
+                       finish_wait(shmem_falloc_waitq, &shmem_fault_wait);
+                       spin_unlock(&inode->i_lock);
+                       return ret;
+               }
+               spin_unlock(&inode->i_lock);
+       }
+
        error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret);
        if (error)
                return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS);
@@ -1819,12 +1883,25 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                struct address_space *mapping = file->f_mapping;
                loff_t unmap_start = round_up(offset, PAGE_SIZE);
                loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
+               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
+
+               shmem_falloc.waitq = &shmem_falloc_waitq;
+               shmem_falloc.start = unmap_start >> PAGE_SHIFT;
+               shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
+               spin_lock(&inode->i_lock);
+               inode->i_private = &shmem_falloc;
+               spin_unlock(&inode->i_lock);
 
                if ((u64)unmap_end > (u64)unmap_start)
                        unmap_mapping_range(mapping, unmap_start,
                                            1 + unmap_end - unmap_start, 0);
                shmem_truncate_range(inode, offset, offset + len - 1);
                /* No need to unmap again: hole-punching leaves COWed pages */
+
+               spin_lock(&inode->i_lock);
+               inode->i_private = NULL;
+               wake_up_all(&shmem_falloc_waitq);
+               spin_unlock(&inode->i_lock);
                error = 0;
                goto out;
        }
@@ -1842,6 +1919,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
                goto out;
        }
 
+       shmem_falloc.waitq = NULL;
        shmem_falloc.start = start;
        shmem_falloc.next  = start;
        shmem_falloc.nr_falloced = 0;
@@ -1937,22 +2015,14 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 
        inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
        if (inode) {
-#ifdef CONFIG_TMPFS_POSIX_ACL
-               error = generic_acl_init(inode, dir);
-               if (error) {
-                       iput(inode);
-                       return error;
-               }
-#endif
+               error = simple_acl_create(dir, inode);
+               if (error)
+                       goto out_iput;
                error = security_inode_init_security(inode, dir,
                                                     &dentry->d_name,
                                                     shmem_initxattrs, NULL);
-               if (error) {
-                       if (error != -EOPNOTSUPP) {
-                               iput(inode);
-                               return error;
-                       }
-               }
+               if (error && error != -EOPNOTSUPP)
+                       goto out_iput;
 
                error = 0;
                dir->i_size += BOGO_DIRENT_SIZE;
@@ -1961,6 +2031,9 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
                dget(dentry); /* Extra count - pin the dentry in core */
        }
        return error;
+out_iput:
+       iput(inode);
+       return error;
 }
 
 static int
@@ -1974,24 +2047,17 @@ shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
                error = security_inode_init_security(inode, dir,
                                                     NULL,
                                                     shmem_initxattrs, NULL);
-               if (error) {
-                       if (error != -EOPNOTSUPP) {
-                               iput(inode);
-                               return error;
-                       }
-               }
-#ifdef CONFIG_TMPFS_POSIX_ACL
-               error = generic_acl_init(inode, dir);
-               if (error) {
-                       iput(inode);
-                       return error;
-               }
-#else
-               error = 0;
-#endif
+               if (error && error != -EOPNOTSUPP)
+                       goto out_iput;
+               error = simple_acl_create(dir, inode);
+               if (error)
+                       goto out_iput;
                d_tmpfile(dentry, inode);
        }
        return error;
+out_iput:
+       iput(inode);
+       return error;
 }
 
 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -2077,8 +2143,10 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct
 
        if (new_dentry->d_inode) {
                (void) shmem_unlink(new_dir, new_dentry);
-               if (they_are_dirs)
+               if (they_are_dirs) {
+                       drop_nlink(new_dentry->d_inode);
                        drop_nlink(old_dir);
+               }
        } else if (they_are_dirs) {
                drop_nlink(old_dir);
                inc_nlink(new_dir);
@@ -2223,8 +2291,8 @@ static int shmem_initxattrs(struct inode *inode,
 
 static const struct xattr_handler *shmem_xattr_handlers[] = {
 #ifdef CONFIG_TMPFS_POSIX_ACL
-       &generic_acl_access_handler,
-       &generic_acl_default_handler,
+       &posix_acl_access_xattr_handler,
+       &posix_acl_default_xattr_handler,
 #endif
        NULL
 };
@@ -2740,6 +2808,7 @@ static const struct inode_operations shmem_inode_operations = {
        .getxattr       = shmem_getxattr,
        .listxattr      = shmem_listxattr,
        .removexattr    = shmem_removexattr,
+       .set_acl        = simple_set_acl,
 #endif
 };
 
@@ -2764,6 +2833,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 #endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
        .setattr        = shmem_setattr,
+       .set_acl        = simple_set_acl,
 #endif
 };
 
@@ -2776,6 +2846,7 @@ static const struct inode_operations shmem_special_inode_operations = {
 #endif
 #ifdef CONFIG_TMPFS_POSIX_ACL
        .setattr        = shmem_setattr,
+       .set_acl        = simple_set_acl,
 #endif
 };