s390/mm: optimize page table upgrade routine
authorAlexander Gordeev <agordeev@linux.ibm.com>
Sun, 8 Mar 2020 20:34:49 +0000 (21:34 +0100)
committerVasily Gorbik <gor@linux.ibm.com>
Mon, 23 Mar 2020 12:41:53 +0000 (13:41 +0100)
There is a maximum of two new tables allocated on page table
upgrade. Because we know that a loop the current implementation
is based on could be unrolled with some improvements:

  * upgrade from 3 to 5 levels happens in one go - without an
    unnecessary re-take of page_table_lock in-between;

  * page tables initialization moved out of the atomic code;

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
arch/s390/mm/pgalloc.c

index 3dd253f..d3be3fe 100644 (file)
@@ -77,43 +77,65 @@ static void __crst_table_upgrade(void *arg)
 
 int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 {
-       unsigned long *table, *pgd;
-       int rc, notify;
+       unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
+       unsigned long asce_limit = mm->context.asce_limit;
 
        /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
-       VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
-       rc = 0;
-       notify = 0;
-       while (mm->context.asce_limit < end) {
-               table = crst_table_alloc(mm);
-               if (!table) {
-                       rc = -ENOMEM;
-                       break;
-               }
-               spin_lock_bh(&mm->page_table_lock);
-               pgd = (unsigned long *) mm->pgd;
-               if (mm->context.asce_limit == _REGION2_SIZE) {
-                       crst_table_init(table, _REGION2_ENTRY_EMPTY);
-                       p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
-                       mm->pgd = (pgd_t *) table;
-                       mm->context.asce_limit = _REGION1_SIZE;
-                       mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-                               _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
-                       mm_inc_nr_puds(mm);
-               } else {
-                       crst_table_init(table, _REGION1_ENTRY_EMPTY);
-                       pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
-                       mm->pgd = (pgd_t *) table;
-                       mm->context.asce_limit = -PAGE_SIZE;
-                       mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-                               _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
-               }
-               notify = 1;
-               spin_unlock_bh(&mm->page_table_lock);
+       VM_BUG_ON(asce_limit < _REGION2_SIZE);
+
+       if (end <= asce_limit)
+               return 0;
+
+       if (asce_limit == _REGION2_SIZE) {
+               p4d = crst_table_alloc(mm);
+               if (unlikely(!p4d))
+                       goto err_p4d;
+               crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
        }
-       if (notify)
-               on_each_cpu(__crst_table_upgrade, mm, 0);
-       return rc;
+       if (end > _REGION1_SIZE) {
+               pgd = crst_table_alloc(mm);
+               if (unlikely(!pgd))
+                       goto err_pgd;
+               crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
+       }
+
+       spin_lock_bh(&mm->page_table_lock);
+
+       /*
+        * This routine gets called with mmap_sem lock held and there is
+        * no reason to optimize for the case of otherwise. However, if
+        * that would ever change, the below check will let us know.
+        */
+       VM_BUG_ON(asce_limit != mm->context.asce_limit);
+
+       if (p4d) {
+               __pgd = (unsigned long *) mm->pgd;
+               p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
+               mm->pgd = (pgd_t *) p4d;
+               mm->context.asce_limit = _REGION1_SIZE;
+               mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+                       _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+               mm_inc_nr_puds(mm);
+       }
+       if (pgd) {
+               __pgd = (unsigned long *) mm->pgd;
+               pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
+               mm->pgd = (pgd_t *) pgd;
+               mm->context.asce_limit = -PAGE_SIZE;
+               mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+                       _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+       }
+
+       spin_unlock_bh(&mm->page_table_lock);
+
+       on_each_cpu(__crst_table_upgrade, mm, 0);
+
+       return 0;
+
+err_pgd:
+       crst_table_free(mm, p4d);
+err_p4d:
+       return -ENOMEM;
 }
 
 void crst_table_downgrade(struct mm_struct *mm)