* routines.
*/
static noinline void
-__do_page_fault(struct pt_regs *regs, unsigned long error_code,
+__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
unsigned long address)
{
+ unsigned long sw_error_code;
struct vm_area_struct *vma;
struct task_struct *tsk;
struct mm_struct *mm;
* nothing more.
*
* This verifies that the fault happens in kernel space
- * (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 9) == 0.
+ * (hw_error_code & 4) == 0, and that the fault was not a
+ * protection error (hw_error_code & 9) == 0.
*/
if (unlikely(fault_in_kernel_space(address))) {
- if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
+ if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
if (vmalloc_fault(address) >= 0)
return;
}
/* Can handle a stale RO->RW TLB: */
- if (spurious_fault(error_code, address))
+ if (spurious_fault(hw_error_code, address))
return;
/* kprobes don't want to hook the spurious faults: */
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock:
*/
- bad_area_nosemaphore(regs, error_code, address, NULL);
+ bad_area_nosemaphore(regs, hw_error_code, address, NULL);
return;
}
if (unlikely(kprobes_fault(regs)))
return;
- if (unlikely(error_code & X86_PF_RSVD))
- pgtable_bad(regs, error_code, address);
+ if (unlikely(hw_error_code & X86_PF_RSVD))
+ pgtable_bad(regs, hw_error_code, address);
- if (unlikely(smap_violation(error_code, regs))) {
- bad_area_nosemaphore(regs, error_code, address, NULL);
+ if (unlikely(smap_violation(hw_error_code, regs))) {
+ bad_area_nosemaphore(regs, hw_error_code, address, NULL);
return;
}
* in a region with pagefaults disabled then we must not take the fault
*/
if (unlikely(faulthandler_disabled() || !mm)) {
- bad_area_nosemaphore(regs, error_code, address, NULL);
+ bad_area_nosemaphore(regs, hw_error_code, address, NULL);
return;
}
/*
+ * hw_error_code is literally the "page fault error code" passed to
+ * the kernel directly from the hardware. But, we will shortly be
+ * modifying it in software, so give it a new name.
+ */
+ sw_error_code = hw_error_code;
+
+ /*
* It's safe to allow irq's after cr2 has been saved and the
* vmalloc fault has been handled.
*
*/
if (user_mode(regs)) {
local_irq_enable();
- error_code |= X86_PF_USER;
+ /*
+ * Up to this point, X86_PF_USER set in hw_error_code
+ * indicated a user-mode access. But, after this,
+ * X86_PF_USER in sw_error_code will indicate either
+ * that, *or* an implicit kernel(supervisor)-mode access
+ * which originated from user mode.
+ */
+ if (!(hw_error_code & X86_PF_USER)) {
+ /*
+ * The CPU was in user mode, but the CPU says
+ * the fault was not a user-mode access.
+ * Must be an implicit kernel-mode access,
+ * which we do not expect to happen in the
+ * user address space.
+ */
+ pr_warn_once("kernel-mode error from user-mode: %lx\n",
+ hw_error_code);
+
+ sw_error_code |= X86_PF_USER;
+ }
flags |= FAULT_FLAG_USER;
} else {
if (regs->flags & X86_EFLAGS_IF)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- if (error_code & X86_PF_WRITE)
+ if (sw_error_code & X86_PF_WRITE)
flags |= FAULT_FLAG_WRITE;
- if (error_code & X86_PF_INSTR)
+ if (sw_error_code & X86_PF_INSTR)
flags |= FAULT_FLAG_INSTRUCTION;
/*
* space check, thus avoiding the deadlock:
*/
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
- if (!(error_code & X86_PF_USER) &&
+ if (!(sw_error_code & X86_PF_USER) &&
!search_exception_tables(regs->ip)) {
- bad_area_nosemaphore(regs, error_code, address, NULL);
+ bad_area_nosemaphore(regs, sw_error_code, address, NULL);
return;
}
retry:
vma = find_vma(mm, address);
if (unlikely(!vma)) {
- bad_area(regs, error_code, address);
+ bad_area(regs, sw_error_code, address);
return;
}
if (likely(vma->vm_start <= address))
goto good_area;
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, error_code, address);
+ bad_area(regs, sw_error_code, address);
return;
}
- if (error_code & X86_PF_USER) {
+ if (sw_error_code & X86_PF_USER) {
/*
* Accessing the stack below %sp is always a bug.
* The large cushion allows instructions like enter
* 32 pointers and then decrements %sp by 65535.)
*/
if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
- bad_area(regs, error_code, address);
+ bad_area(regs, sw_error_code, address);
return;
}
}
if (unlikely(expand_stack(vma, address))) {
- bad_area(regs, error_code, address);
+ bad_area(regs, sw_error_code, address);
return;
}
* we can handle it..
*/
good_area:
- if (unlikely(access_error(error_code, vma))) {
- bad_area_access_error(regs, error_code, address, vma);
+ if (unlikely(access_error(sw_error_code, vma))) {
+ bad_area_access_error(regs, sw_error_code, address, vma);
return;
}
return;
/* Not returning to user mode? Handle exceptions or die: */
- no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
+ no_context(regs, sw_error_code, address, SIGBUS, BUS_ADRERR);
return;
}
up_read(&mm->mmap_sem);
if (unlikely(fault & VM_FAULT_ERROR)) {
- mm_fault_error(regs, error_code, address, &pkey, fault);
+ mm_fault_error(regs, sw_error_code, address, &pkey, fault);
return;
}