arch/powerpc/mm/hugetlb: NestMMU workaround for hugetlb mprotect RW upgrade
authorAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Tue, 5 Mar 2019 23:46:40 +0000 (15:46 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 6 Mar 2019 05:07:18 +0000 (21:07 -0800)
NestMMU requires us to mark the pte invalid and flush the tlb when we do
a RW upgrade of pte.  We fixed a variant of this in the fault path in
bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to handle
nest MMU hang").

Link: http://lkml.kernel.org/r/20190116085035.29729-6-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/powerpc/include/asm/book3s/64/hugetlb.h
arch/powerpc/mm/hugetlbpage-hash64.c
arch/powerpc/mm/hugetlbpage-radix.c

index 5b01777..66c1e4f 100644 (file)
@@ -13,6 +13,10 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                                unsigned long len, unsigned long pgoff,
                                unsigned long flags);
 
+extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+                                               unsigned long addr, pte_t *ptep,
+                                               pte_t old_pte, pte_t pte);
+
 static inline int hstate_get_psize(struct hstate *hstate)
 {
        unsigned long shift;
@@ -42,4 +46,12 @@ static inline bool gigantic_page_supported(void)
 /* hugepd entry valid bit */
 #define HUGEPD_VAL_BITS                (0x8000000000000000UL)
 
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+                                        unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+                                        unsigned long addr, pte_t *ptep,
+                                        pte_t old_pte, pte_t new_pte);
 #endif
index 2e6a8f9..367ce3a 100644 (file)
@@ -121,3 +121,28 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
 }
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+                                 unsigned long addr, pte_t *ptep)
+{
+       unsigned long pte_val;
+       /*
+        * Clear the _PAGE_PRESENT so that no hardware parallel update is
+        * possible. Also keep the pte_present true so that we don't take
+        * wrong fault.
+        */
+       pte_val = pte_update(vma->vm_mm, addr, ptep,
+                            _PAGE_PRESENT, _PAGE_INVALID, 1);
+
+       return __pte(pte_val);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+                                 pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+
+       if (radix_enabled())
+               return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
+                                                          old_pte, pte);
+       set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
index 2486bee..11d9ea2 100644 (file)
@@ -90,3 +90,20 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 
        return vm_unmapped_area(&info);
 }
+
+void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+                                        unsigned long addr, pte_t *ptep,
+                                        pte_t old_pte, pte_t pte)
+{
+       struct mm_struct *mm = vma->vm_mm;
+
+       /*
+        * To avoid NMMU hang while relaxing access we need to flush the tlb before
+        * we set the new value.
+        */
+       if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+           (atomic_read(&mm->context.copros) > 0))
+               radix__flush_hugetlb_page(vma, addr);
+
+       set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}