LoongArch: mm: Avoid unnecessary page fault retires on shared memory types
[platform/kernel/linux-starfive.git] / arch / loongarch / mm / fault.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
4  *
5  * Derived from MIPS:
6  * Copyright (C) 1995 - 2000 by Ralf Baechle
7  */
8 #include <linux/context_tracking.h>
9 #include <linux/signal.h>
10 #include <linux/sched.h>
11 #include <linux/interrupt.h>
12 #include <linux/kernel.h>
13 #include <linux/entry-common.h>
14 #include <linux/errno.h>
15 #include <linux/string.h>
16 #include <linux/types.h>
17 #include <linux/ptrace.h>
18 #include <linux/ratelimit.h>
19 #include <linux/mman.h>
20 #include <linux/mm.h>
21 #include <linux/smp.h>
22 #include <linux/kdebug.h>
23 #include <linux/kprobes.h>
24 #include <linux/perf_event.h>
25 #include <linux/uaccess.h>
26
27 #include <asm/branch.h>
28 #include <asm/mmu_context.h>
29 #include <asm/ptrace.h>
30
31 int show_unhandled_signals = 1;
32
33 static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
34 {
35         const int field = sizeof(unsigned long) * 2;
36
37         /* Are we prepared to handle this kernel fault?  */
38         if (fixup_exception(regs))
39                 return;
40
41         /*
42          * Oops. The kernel tried to access some bad page. We'll have to
43          * terminate things with extreme prejudice.
44          */
45         bust_spinlocks(1);
46
47         pr_alert("CPU %d Unable to handle kernel paging request at "
48                "virtual address %0*lx, era == %0*lx, ra == %0*lx\n",
49                raw_smp_processor_id(), field, address, field, regs->csr_era,
50                field,  regs->regs[1]);
51         die("Oops", regs);
52 }
53
54 static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
55 {
56         /*
57          * We ran out of memory, call the OOM killer, and return the userspace
58          * (which will retry the fault, or kill us if we got oom-killed).
59          */
60         if (!user_mode(regs)) {
61                 no_context(regs, address);
62                 return;
63         }
64         pagefault_out_of_memory();
65 }
66
67 static void __kprobes do_sigbus(struct pt_regs *regs,
68                 unsigned long write, unsigned long address, int si_code)
69 {
70         /* Kernel mode? Handle exceptions or die */
71         if (!user_mode(regs)) {
72                 no_context(regs, address);
73                 return;
74         }
75
76         /*
77          * Send a sigbus, regardless of whether we were in kernel
78          * or user mode.
79          */
80         current->thread.csr_badvaddr = address;
81         current->thread.trap_nr = read_csr_excode();
82         force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
83 }
84
85 static void __kprobes do_sigsegv(struct pt_regs *regs,
86                 unsigned long write, unsigned long address, int si_code)
87 {
88         const int field = sizeof(unsigned long) * 2;
89         static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
90
91         /* Kernel mode? Handle exceptions or die */
92         if (!user_mode(regs)) {
93                 no_context(regs, address);
94                 return;
95         }
96
97         /* User mode accesses just cause a SIGSEGV */
98         current->thread.csr_badvaddr = address;
99         if (!write)
100                 current->thread.error_code = 1;
101         else
102                 current->thread.error_code = 2;
103         current->thread.trap_nr = read_csr_excode();
104
105         if (show_unhandled_signals &&
106             unhandled_signal(current, SIGSEGV) && __ratelimit(&ratelimit_state)) {
107                 pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n",
108                         current->comm,
109                         write ? "write access to" : "read access from",
110                         field, address);
111                 pr_info("era = %0*lx in", field,
112                         (unsigned long) regs->csr_era);
113                 print_vma_addr(KERN_CONT " ", regs->csr_era);
114                 pr_cont("\n");
115                 pr_info("ra  = %0*lx in", field,
116                         (unsigned long) regs->regs[1]);
117                 print_vma_addr(KERN_CONT " ", regs->regs[1]);
118                 pr_cont("\n");
119         }
120         force_sig_fault(SIGSEGV, si_code, (void __user *)address);
121 }
122
123 /*
124  * This routine handles page faults.  It determines the address,
125  * and the problem, and then passes it off to one of the appropriate
126  * routines.
127  */
128 static void __kprobes __do_page_fault(struct pt_regs *regs,
129                         unsigned long write, unsigned long address)
130 {
131         int si_code = SEGV_MAPERR;
132         unsigned int flags = FAULT_FLAG_DEFAULT;
133         struct task_struct *tsk = current;
134         struct mm_struct *mm = tsk->mm;
135         struct vm_area_struct *vma = NULL;
136         vm_fault_t fault;
137
138         /*
139          * We fault-in kernel-space virtual memory on-demand. The
140          * 'reference' page table is init_mm.pgd.
141          *
142          * NOTE! We MUST NOT take any locks for this case. We may
143          * be in an interrupt or a critical region, and should
144          * only copy the information from the master page table,
145          * nothing more.
146          */
147         if (address & __UA_LIMIT) {
148                 if (!user_mode(regs))
149                         no_context(regs, address);
150                 else
151                         do_sigsegv(regs, write, address, si_code);
152                 return;
153         }
154
155         /*
156          * If we're in an interrupt or have no user
157          * context, we must not take the fault..
158          */
159         if (faulthandler_disabled() || !mm) {
160                 do_sigsegv(regs, write, address, si_code);
161                 return;
162         }
163
164         if (user_mode(regs))
165                 flags |= FAULT_FLAG_USER;
166
167         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
168 retry:
169         mmap_read_lock(mm);
170         vma = find_vma(mm, address);
171         if (!vma)
172                 goto bad_area;
173         if (vma->vm_start <= address)
174                 goto good_area;
175         if (!(vma->vm_flags & VM_GROWSDOWN))
176                 goto bad_area;
177         if (!expand_stack(vma, address))
178                 goto good_area;
179 /*
180  * Something tried to access memory that isn't in our memory map..
181  * Fix it, but check if it's kernel or user first..
182  */
183 bad_area:
184         mmap_read_unlock(mm);
185         do_sigsegv(regs, write, address, si_code);
186         return;
187
188 /*
189  * Ok, we have a good vm_area for this memory access, so
190  * we can handle it..
191  */
192 good_area:
193         si_code = SEGV_ACCERR;
194
195         if (write) {
196                 flags |= FAULT_FLAG_WRITE;
197                 if (!(vma->vm_flags & VM_WRITE))
198                         goto bad_area;
199         } else {
200                 if (!(vma->vm_flags & VM_READ) && address != exception_era(regs))
201                         goto bad_area;
202                 if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs))
203                         goto bad_area;
204         }
205
206         /*
207          * If for any reason at all we couldn't handle the fault,
208          * make sure we exit gracefully rather than endlessly redo
209          * the fault.
210          */
211         fault = handle_mm_fault(vma, address, flags, regs);
212
213         if (fault_signal_pending(fault, regs)) {
214                 if (!user_mode(regs))
215                         no_context(regs, address);
216                 return;
217         }
218
219         /* The fault is fully completed (including releasing mmap lock) */
220         if (fault & VM_FAULT_COMPLETED)
221                 return;
222
223         if (unlikely(fault & VM_FAULT_RETRY)) {
224                 flags |= FAULT_FLAG_TRIED;
225
226                 /*
227                  * No need to mmap_read_unlock(mm) as we would
228                  * have already released it in __lock_page_or_retry
229                  * in mm/filemap.c.
230                  */
231                 goto retry;
232         }
233         if (unlikely(fault & VM_FAULT_ERROR)) {
234                 mmap_read_unlock(mm);
235                 if (fault & VM_FAULT_OOM) {
236                         do_out_of_memory(regs, address);
237                         return;
238                 } else if (fault & VM_FAULT_SIGSEGV) {
239                         do_sigsegv(regs, write, address, si_code);
240                         return;
241                 } else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
242                         do_sigbus(regs, write, address, si_code);
243                         return;
244                 }
245                 BUG();
246         }
247
248         mmap_read_unlock(mm);
249 }
250
251 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
252                         unsigned long write, unsigned long address)
253 {
254         irqentry_state_t state = irqentry_enter(regs);
255
256         /* Enable interrupt if enabled in parent context */
257         if (likely(regs->csr_prmd & CSR_PRMD_PIE))
258                 local_irq_enable();
259
260         __do_page_fault(regs, write, address);
261
262         local_irq_disable();
263
264         irqentry_exit(regs, state);
265 }