Merge tag 'powerpc-4.1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
[platform/kernel/linux-rpi.git] / arch / powerpc / kvm / book3s_hv_rm_mmu.c
index 625407e..b027a89 100644 (file)
@@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x)
 {
        unsigned long addr = (unsigned long) x;
        pte_t *p;
-
-       p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
+       /*
+        * assume we don't have huge pages in vmalloc space...
+        * So don't worry about THP collapse/split. Called
+        * Only in realmode, hence won't need irq_save/restore.
+        */
+       p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
        if (!p || !pte_present(*p))
                return NULL;
-       /* assume we don't have huge pages in vmalloc space... */
        addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
        return __va(addr);
 }
@@ -131,31 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
        unlock_rmap(rmap);
 }
 
-static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
-                             int writing, unsigned long *pte_sizep)
-{
-       pte_t *ptep;
-       unsigned long ps = *pte_sizep;
-       unsigned int hugepage_shift;
-
-       ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
-       if (!ptep)
-               return __pte(0);
-       if (hugepage_shift)
-               *pte_sizep = 1ul << hugepage_shift;
-       else
-               *pte_sizep = PAGE_SIZE;
-       if (ps > *pte_sizep)
-               return __pte(0);
-       return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
-}
-
-static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
-{
-       asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-       hpte[0] = cpu_to_be64(hpte_v);
-}
-
 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                       long pte_index, unsigned long pteh, unsigned long ptel,
                       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
@@ -166,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
        struct revmap_entry *rev;
        unsigned long g_ptel;
        struct kvm_memory_slot *memslot;
-       unsigned long pte_size;
+       unsigned hpage_shift;
        unsigned long is_io;
        unsigned long *rmap;
-       pte_t pte;
+       pte_t *ptep;
        unsigned int writing;
        unsigned long mmu_seq;
-       unsigned long rcbits;
+       unsigned long rcbits, irq_flags = 0;
 
        psize = hpte_page_size(pteh, ptel);
        if (!psize)
@@ -208,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
        /* Translate to host virtual address */
        hva = __gfn_to_hva_memslot(memslot, gfn);
-
-       /* Look up the Linux PTE for the backing page */
-       pte_size = psize;
-       pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
-       if (pte_present(pte) && !pte_protnone(pte)) {
-               if (writing && !pte_write(pte))
-                       /* make the actual HPTE be read-only */
-                       ptel = hpte_make_readonly(ptel);
-               is_io = hpte_cache_bits(pte_val(pte));
-               pa = pte_pfn(pte) << PAGE_SHIFT;
-               pa |= hva & (pte_size - 1);
-               pa |= gpa & ~PAGE_MASK;
+       /*
+        * If we had a page table table change after lookup, we would
+        * retry via mmu_notifier_retry.
+        */
+       if (realmode)
+               ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
+       else {
+               local_irq_save(irq_flags);
+               ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
        }
+       if (ptep) {
+               pte_t pte;
+               unsigned int host_pte_size;
 
-       if (pte_size < psize)
-               return H_PARAMETER;
+               if (hpage_shift)
+                       host_pte_size = 1ul << hpage_shift;
+               else
+                       host_pte_size = PAGE_SIZE;
+               /*
+                * We should always find the guest page size
+                * to <= host page size, if host is using hugepage
+                */
+               if (host_pte_size < psize) {
+                       if (!realmode)
+                               local_irq_restore(flags);
+                       return H_PARAMETER;
+               }
+               pte = kvmppc_read_update_linux_pte(ptep, writing);
+               if (pte_present(pte) && !pte_protnone(pte)) {
+                       if (writing && !pte_write(pte))
+                               /* make the actual HPTE be read-only */
+                               ptel = hpte_make_readonly(ptel);
+                       is_io = hpte_cache_bits(pte_val(pte));
+                       pa = pte_pfn(pte) << PAGE_SHIFT;
+                       pa |= hva & (host_pte_size - 1);
+                       pa |= gpa & ~PAGE_MASK;
+               }
+       }
+       if (!realmode)
+               local_irq_restore(irq_flags);
 
        ptel &= ~(HPTE_R_PP0 - psize);
        ptel |= pa;
@@ -271,10 +273,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                                u64 pte;
                                while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
                                        cpu_relax();
-                               pte = be64_to_cpu(*hpte);
+                               pte = be64_to_cpu(hpte[0]);
                                if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
                                        break;
-                               *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                               __unlock_hpte(hpte, pte);
                                hpte += 2;
                        }
                        if (i == 8)
@@ -290,9 +292,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
                        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
                                cpu_relax();
-                       pte = be64_to_cpu(*hpte);
+                       pte = be64_to_cpu(hpte[0]);
                        if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
-                               *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                               __unlock_hpte(hpte, pte);
                                return H_PTEG_FULL;
                        }
                }
@@ -331,7 +333,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
        /* Write the first HPTE dword, unlocking the HPTE and making it valid */
        eieio();
-       hpte[0] = cpu_to_be64(pteh);
+       __unlock_hpte(hpte, pteh);
        asm volatile("ptesync" : : : "memory");
 
        *pte_idx_ret = pte_index;
@@ -412,7 +414,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
            ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
-               hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hpte, pte);
                return H_NOT_FOUND;
        }
 
@@ -548,7 +550,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
                                be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
                        rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
                        args[j] |= rcbits << (56 - 5);
-                       hp[0] = 0;
+                       __unlock_hpte(hp, 0);
                }
        }
 
@@ -574,7 +576,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
        pte = be64_to_cpu(hpte[0]);
        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
-               hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hpte, pte);
                return H_NOT_FOUND;
        }
 
@@ -755,8 +757,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
                                /* Return with the HPTE still locked */
                                return (hash << 3) + (i >> 1);
 
-                       /* Unlock and move on */
-                       hpte[i] = cpu_to_be64(v);
+                       __unlock_hpte(&hpte[i], v);
                }
 
                if (val & HPTE_V_SECONDARY)