1 // SPDX-License-Identifier: GPL-2.0
3 * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
5 * Copyright (C) 2003 David Gibson, IBM Corporation.
7 * Based on the IA-32 version:
8 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
12 #include <linux/hugetlb.h>
13 #include <asm/pgalloc.h>
14 #include <asm/cacheflush.h>
15 #include <asm/machdep.h>
17 unsigned int hpage_shift;
18 EXPORT_SYMBOL(hpage_shift);
20 extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
21 unsigned long pa, unsigned long rlags,
22 unsigned long vflags, int psize, int ssize);
24 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
25 pte_t *ptep, unsigned long trap, unsigned long flags,
26 int ssize, unsigned int shift, unsigned int mmu_psize)
30 unsigned long old_pte, new_pte;
31 unsigned long rflags, pa;
34 BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
36 /* Search the Linux page table for a match with va */
37 vpn = hpt_vpn(ea, vsid, ssize);
40 * At this point, we have a pte (old_pte) which can be used to build
41 * or update an HPTE. There are 2 cases:
43 * 1. There is a valid (present) pte with no associated HPTE (this is
44 * the most common case)
45 * 2. There is a valid (present) pte with an associated HPTE. The
46 * current values of the pp bits in the HPTE prevent access
47 * because we are doing software DIRTY bit management and the
48 * page is currently not DIRTY.
53 old_pte = pte_val(*ptep);
54 /* If PTE busy, retry the access */
55 if (unlikely(old_pte & H_PAGE_BUSY))
57 /* If PTE permissions don't match, take page fault */
58 if (unlikely(!check_pte_access(access, old_pte)))
62 * Try to lock the PTE, add ACCESSED and DIRTY if it was
65 new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
66 if (access & _PAGE_WRITE)
67 new_pte |= _PAGE_DIRTY;
68 } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
70 /* Make sure this is a hugetlb entry */
71 if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
74 rflags = htab_convert_pte_flags(new_pte);
75 if (unlikely(mmu_psize == MMU_PAGE_16G))
76 offset = PTRS_PER_PUD;
78 offset = PTRS_PER_PMD;
79 rpte = __real_pte(__pte(old_pte), ptep, offset);
81 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
83 * No CPU has hugepages but lacks no execute, so we
84 * don't need to worry about that case
86 rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
88 /* Check if pte already has an hpte (case 2) */
89 if (unlikely(old_pte & H_PAGE_HASHPTE)) {
90 /* There MIGHT be an HPTE for this pte */
93 gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
94 if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
95 mmu_psize, ssize, flags) == -1)
96 old_pte &= ~_PAGE_HPTEFLAGS;
99 if (likely(!(old_pte & H_PAGE_HASHPTE))) {
100 unsigned long hash = hpt_hash(vpn, shift, ssize);
102 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
104 /* clear HPTE slot informations in new PTE */
105 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
107 slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
111 * Hypervisor failure. Restore old pte and return -1
112 * similar to __hash_page_*
114 if (unlikely(slot == -2)) {
115 *ptep = __pte(old_pte);
116 hash_failure_debug(ea, access, vsid, trap, ssize,
117 mmu_psize, mmu_psize, old_pte);
121 new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
125 * No need to use ldarx/stdcx here
127 *ptep = __pte(new_pte & ~H_PAGE_BUSY);
131 pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
132 unsigned long addr, pte_t *ptep)
134 unsigned long pte_val;
136 * Clear the _PAGE_PRESENT so that no hardware parallel update is
137 * possible. Also keep the pte_present true so that we don't take
140 pte_val = pte_update(vma->vm_mm, addr, ptep,
141 _PAGE_PRESENT, _PAGE_INVALID, 1);
143 return __pte(pte_val);
146 void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
147 pte_t *ptep, pte_t old_pte, pte_t pte)
151 return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
153 set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
156 void hugetlbpage_init_default(void)
158 /* Set default large page size. Currently, we pick 16M or 1M
159 * depending on what is available
161 if (mmu_psize_defs[MMU_PAGE_16M].shift)
162 hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
163 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
164 hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
165 else if (mmu_psize_defs[MMU_PAGE_2M].shift)
166 hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;