mm: don't include asm/pgtable.h if linux/mm.h is already included
[platform/kernel/linux-starfive.git] / arch / sparc / mm / hugetlbpage.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * SPARC64 Huge TLB page support.
4  *
5  * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
6  */
7
8 #include <linux/fs.h>
9 #include <linux/mm.h>
10 #include <linux/sched/mm.h>
11 #include <linux/hugetlb.h>
12 #include <linux/pagemap.h>
13 #include <linux/sysctl.h>
14
15 #include <asm/mman.h>
16 #include <asm/pgalloc.h>
17 #include <asm/tlb.h>
18 #include <asm/tlbflush.h>
19 #include <asm/cacheflush.h>
20 #include <asm/mmu_context.h>
21
22 /* Slightly simplified from the non-hugepage variant because by
23  * definition we don't have to worry about any page coloring stuff
24  */
25
26 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
27                                                         unsigned long addr,
28                                                         unsigned long len,
29                                                         unsigned long pgoff,
30                                                         unsigned long flags)
31 {
32         struct hstate *h = hstate_file(filp);
33         unsigned long task_size = TASK_SIZE;
34         struct vm_unmapped_area_info info;
35
36         if (test_thread_flag(TIF_32BIT))
37                 task_size = STACK_TOP32;
38
39         info.flags = 0;
40         info.length = len;
41         info.low_limit = TASK_UNMAPPED_BASE;
42         info.high_limit = min(task_size, VA_EXCLUDE_START);
43         info.align_mask = PAGE_MASK & ~huge_page_mask(h);
44         info.align_offset = 0;
45         addr = vm_unmapped_area(&info);
46
47         if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
48                 VM_BUG_ON(addr != -ENOMEM);
49                 info.low_limit = VA_EXCLUDE_END;
50                 info.high_limit = task_size;
51                 addr = vm_unmapped_area(&info);
52         }
53
54         return addr;
55 }
56
57 static unsigned long
58 hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
59                                   const unsigned long len,
60                                   const unsigned long pgoff,
61                                   const unsigned long flags)
62 {
63         struct hstate *h = hstate_file(filp);
64         struct mm_struct *mm = current->mm;
65         unsigned long addr = addr0;
66         struct vm_unmapped_area_info info;
67
68         /* This should only ever run for 32-bit processes.  */
69         BUG_ON(!test_thread_flag(TIF_32BIT));
70
71         info.flags = VM_UNMAPPED_AREA_TOPDOWN;
72         info.length = len;
73         info.low_limit = PAGE_SIZE;
74         info.high_limit = mm->mmap_base;
75         info.align_mask = PAGE_MASK & ~huge_page_mask(h);
76         info.align_offset = 0;
77         addr = vm_unmapped_area(&info);
78
79         /*
80          * A failed mmap() very likely causes application failure,
81          * so fall back to the bottom-up function here. This scenario
82          * can happen with large stack limits and large mmap()
83          * allocations.
84          */
85         if (addr & ~PAGE_MASK) {
86                 VM_BUG_ON(addr != -ENOMEM);
87                 info.flags = 0;
88                 info.low_limit = TASK_UNMAPPED_BASE;
89                 info.high_limit = STACK_TOP32;
90                 addr = vm_unmapped_area(&info);
91         }
92
93         return addr;
94 }
95
96 unsigned long
97 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
98                 unsigned long len, unsigned long pgoff, unsigned long flags)
99 {
100         struct hstate *h = hstate_file(file);
101         struct mm_struct *mm = current->mm;
102         struct vm_area_struct *vma;
103         unsigned long task_size = TASK_SIZE;
104
105         if (test_thread_flag(TIF_32BIT))
106                 task_size = STACK_TOP32;
107
108         if (len & ~huge_page_mask(h))
109                 return -EINVAL;
110         if (len > task_size)
111                 return -ENOMEM;
112
113         if (flags & MAP_FIXED) {
114                 if (prepare_hugepage_range(file, addr, len))
115                         return -EINVAL;
116                 return addr;
117         }
118
119         if (addr) {
120                 addr = ALIGN(addr, huge_page_size(h));
121                 vma = find_vma(mm, addr);
122                 if (task_size - len >= addr &&
123                     (!vma || addr + len <= vm_start_gap(vma)))
124                         return addr;
125         }
126         if (mm->get_unmapped_area == arch_get_unmapped_area)
127                 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
128                                 pgoff, flags);
129         else
130                 return hugetlb_get_unmapped_area_topdown(file, addr, len,
131                                 pgoff, flags);
132 }
133
134 static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
135 {
136         return entry;
137 }
138
139 static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
140 {
141         unsigned long hugepage_size = _PAGE_SZ4MB_4V;
142
143         pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
144
145         switch (shift) {
146         case HPAGE_16GB_SHIFT:
147                 hugepage_size = _PAGE_SZ16GB_4V;
148                 pte_val(entry) |= _PAGE_PUD_HUGE;
149                 break;
150         case HPAGE_2GB_SHIFT:
151                 hugepage_size = _PAGE_SZ2GB_4V;
152                 pte_val(entry) |= _PAGE_PMD_HUGE;
153                 break;
154         case HPAGE_256MB_SHIFT:
155                 hugepage_size = _PAGE_SZ256MB_4V;
156                 pte_val(entry) |= _PAGE_PMD_HUGE;
157                 break;
158         case HPAGE_SHIFT:
159                 pte_val(entry) |= _PAGE_PMD_HUGE;
160                 break;
161         case HPAGE_64K_SHIFT:
162                 hugepage_size = _PAGE_SZ64K_4V;
163                 break;
164         default:
165                 WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
166         }
167
168         pte_val(entry) = pte_val(entry) | hugepage_size;
169         return entry;
170 }
171
172 static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
173 {
174         if (tlb_type == hypervisor)
175                 return sun4v_hugepage_shift_to_tte(entry, shift);
176         else
177                 return sun4u_hugepage_shift_to_tte(entry, shift);
178 }
179
180 pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
181                          struct page *page, int writeable)
182 {
183         unsigned int shift = huge_page_shift(hstate_vma(vma));
184         pte_t pte;
185
186         pte = hugepage_shift_to_tte(entry, shift);
187
188 #ifdef CONFIG_SPARC64
189         /* If this vma has ADI enabled on it, turn on TTE.mcd
190          */
191         if (vma->vm_flags & VM_SPARC_ADI)
192                 return pte_mkmcd(pte);
193         else
194                 return pte_mknotmcd(pte);
195 #else
196         return pte;
197 #endif
198 }
199
200 static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
201 {
202         unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
203         unsigned int shift;
204
205         switch (tte_szbits) {
206         case _PAGE_SZ16GB_4V:
207                 shift = HPAGE_16GB_SHIFT;
208                 break;
209         case _PAGE_SZ2GB_4V:
210                 shift = HPAGE_2GB_SHIFT;
211                 break;
212         case _PAGE_SZ256MB_4V:
213                 shift = HPAGE_256MB_SHIFT;
214                 break;
215         case _PAGE_SZ4MB_4V:
216                 shift = REAL_HPAGE_SHIFT;
217                 break;
218         case _PAGE_SZ64K_4V:
219                 shift = HPAGE_64K_SHIFT;
220                 break;
221         default:
222                 shift = PAGE_SHIFT;
223                 break;
224         }
225         return shift;
226 }
227
228 static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
229 {
230         unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
231         unsigned int shift;
232
233         switch (tte_szbits) {
234         case _PAGE_SZ256MB_4U:
235                 shift = HPAGE_256MB_SHIFT;
236                 break;
237         case _PAGE_SZ4MB_4U:
238                 shift = REAL_HPAGE_SHIFT;
239                 break;
240         case _PAGE_SZ64K_4U:
241                 shift = HPAGE_64K_SHIFT;
242                 break;
243         default:
244                 shift = PAGE_SHIFT;
245                 break;
246         }
247         return shift;
248 }
249
250 static unsigned int huge_tte_to_shift(pte_t entry)
251 {
252         unsigned long shift;
253
254         if (tlb_type == hypervisor)
255                 shift = sun4v_huge_tte_to_shift(entry);
256         else
257                 shift = sun4u_huge_tte_to_shift(entry);
258
259         if (shift == PAGE_SHIFT)
260                 WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
261                           pte_val(entry));
262
263         return shift;
264 }
265
266 static unsigned long huge_tte_to_size(pte_t pte)
267 {
268         unsigned long size = 1UL << huge_tte_to_shift(pte);
269
270         if (size == REAL_HPAGE_SIZE)
271                 size = HPAGE_SIZE;
272         return size;
273 }
274
275 pte_t *huge_pte_alloc(struct mm_struct *mm,
276                         unsigned long addr, unsigned long sz)
277 {
278         pgd_t *pgd;
279         p4d_t *p4d;
280         pud_t *pud;
281         pmd_t *pmd;
282
283         pgd = pgd_offset(mm, addr);
284         p4d = p4d_offset(pgd, addr);
285         pud = pud_alloc(mm, p4d, addr);
286         if (!pud)
287                 return NULL;
288         if (sz >= PUD_SIZE)
289                 return (pte_t *)pud;
290         pmd = pmd_alloc(mm, pud, addr);
291         if (!pmd)
292                 return NULL;
293         if (sz >= PMD_SIZE)
294                 return (pte_t *)pmd;
295         return pte_alloc_map(mm, pmd, addr);
296 }
297
298 pte_t *huge_pte_offset(struct mm_struct *mm,
299                        unsigned long addr, unsigned long sz)
300 {
301         pgd_t *pgd;
302         p4d_t *p4d;
303         pud_t *pud;
304         pmd_t *pmd;
305
306         pgd = pgd_offset(mm, addr);
307         if (pgd_none(*pgd))
308                 return NULL;
309         p4d = p4d_offset(pgd, addr);
310         if (p4d_none(*p4d))
311                 return NULL;
312         pud = pud_offset(p4d, addr);
313         if (pud_none(*pud))
314                 return NULL;
315         if (is_hugetlb_pud(*pud))
316                 return (pte_t *)pud;
317         pmd = pmd_offset(pud, addr);
318         if (pmd_none(*pmd))
319                 return NULL;
320         if (is_hugetlb_pmd(*pmd))
321                 return (pte_t *)pmd;
322         return pte_offset_map(pmd, addr);
323 }
324
325 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
326                      pte_t *ptep, pte_t entry)
327 {
328         unsigned int nptes, orig_shift, shift;
329         unsigned long i, size;
330         pte_t orig;
331
332         size = huge_tte_to_size(entry);
333
334         shift = PAGE_SHIFT;
335         if (size >= PUD_SIZE)
336                 shift = PUD_SHIFT;
337         else if (size >= PMD_SIZE)
338                 shift = PMD_SHIFT;
339         else
340                 shift = PAGE_SHIFT;
341
342         nptes = size >> shift;
343
344         if (!pte_present(*ptep) && pte_present(entry))
345                 mm->context.hugetlb_pte_count += nptes;
346
347         addr &= ~(size - 1);
348         orig = *ptep;
349         orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
350
351         for (i = 0; i < nptes; i++)
352                 ptep[i] = __pte(pte_val(entry) + (i << shift));
353
354         maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
355         /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
356         if (size == HPAGE_SIZE)
357                 maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
358                                     orig_shift);
359 }
360
361 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
362                               pte_t *ptep)
363 {
364         unsigned int i, nptes, orig_shift, shift;
365         unsigned long size;
366         pte_t entry;
367
368         entry = *ptep;
369         size = huge_tte_to_size(entry);
370
371         shift = PAGE_SHIFT;
372         if (size >= PUD_SIZE)
373                 shift = PUD_SHIFT;
374         else if (size >= PMD_SIZE)
375                 shift = PMD_SHIFT;
376         else
377                 shift = PAGE_SHIFT;
378
379         nptes = size >> shift;
380         orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
381
382         if (pte_present(entry))
383                 mm->context.hugetlb_pte_count -= nptes;
384
385         addr &= ~(size - 1);
386         for (i = 0; i < nptes; i++)
387                 ptep[i] = __pte(0UL);
388
389         maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
390         /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
391         if (size == HPAGE_SIZE)
392                 maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
393                                     orig_shift);
394
395         return entry;
396 }
397
398 int pmd_huge(pmd_t pmd)
399 {
400         return !pmd_none(pmd) &&
401                 (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
402 }
403
404 int pud_huge(pud_t pud)
405 {
406         return !pud_none(pud) &&
407                 (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
408 }
409
410 static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
411                            unsigned long addr)
412 {
413         pgtable_t token = pmd_pgtable(*pmd);
414
415         pmd_clear(pmd);
416         pte_free_tlb(tlb, token, addr);
417         mm_dec_nr_ptes(tlb->mm);
418 }
419
420 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
421                                    unsigned long addr, unsigned long end,
422                                    unsigned long floor, unsigned long ceiling)
423 {
424         pmd_t *pmd;
425         unsigned long next;
426         unsigned long start;
427
428         start = addr;
429         pmd = pmd_offset(pud, addr);
430         do {
431                 next = pmd_addr_end(addr, end);
432                 if (pmd_none(*pmd))
433                         continue;
434                 if (is_hugetlb_pmd(*pmd))
435                         pmd_clear(pmd);
436                 else
437                         hugetlb_free_pte_range(tlb, pmd, addr);
438         } while (pmd++, addr = next, addr != end);
439
440         start &= PUD_MASK;
441         if (start < floor)
442                 return;
443         if (ceiling) {
444                 ceiling &= PUD_MASK;
445                 if (!ceiling)
446                         return;
447         }
448         if (end - 1 > ceiling - 1)
449                 return;
450
451         pmd = pmd_offset(pud, start);
452         pud_clear(pud);
453         pmd_free_tlb(tlb, pmd, start);
454         mm_dec_nr_pmds(tlb->mm);
455 }
456
457 static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
458                                    unsigned long addr, unsigned long end,
459                                    unsigned long floor, unsigned long ceiling)
460 {
461         pud_t *pud;
462         unsigned long next;
463         unsigned long start;
464
465         start = addr;
466         pud = pud_offset(p4d, addr);
467         do {
468                 next = pud_addr_end(addr, end);
469                 if (pud_none_or_clear_bad(pud))
470                         continue;
471                 if (is_hugetlb_pud(*pud))
472                         pud_clear(pud);
473                 else
474                         hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
475                                                ceiling);
476         } while (pud++, addr = next, addr != end);
477
478         start &= PGDIR_MASK;
479         if (start < floor)
480                 return;
481         if (ceiling) {
482                 ceiling &= PGDIR_MASK;
483                 if (!ceiling)
484                         return;
485         }
486         if (end - 1 > ceiling - 1)
487                 return;
488
489         pud = pud_offset(p4d, start);
490         p4d_clear(p4d);
491         pud_free_tlb(tlb, pud, start);
492         mm_dec_nr_puds(tlb->mm);
493 }
494
495 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
496                             unsigned long addr, unsigned long end,
497                             unsigned long floor, unsigned long ceiling)
498 {
499         pgd_t *pgd;
500         p4d_t *p4d;
501         unsigned long next;
502
503         addr &= PMD_MASK;
504         if (addr < floor) {
505                 addr += PMD_SIZE;
506                 if (!addr)
507                         return;
508         }
509         if (ceiling) {
510                 ceiling &= PMD_MASK;
511                 if (!ceiling)
512                         return;
513         }
514         if (end - 1 > ceiling - 1)
515                 end -= PMD_SIZE;
516         if (addr > end - 1)
517                 return;
518
519         pgd = pgd_offset(tlb->mm, addr);
520         p4d = p4d_offset(pgd, addr);
521         do {
522                 next = p4d_addr_end(addr, end);
523                 if (p4d_none_or_clear_bad(p4d))
524                         continue;
525                 hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
526         } while (p4d++, addr = next, addr != end);
527 }