From 508074607c7b95b24f0adf633fdf606761bb7824 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 20 Apr 2022 09:41:31 +0100 Subject: [PATCH] ARM: 9195/1: entry: avoid explicit literal loads ARMv7 has MOVW/MOVT instruction pairs to load symbol addresses into registers without having to rely on literal loads that go via the D-cache. For older cores, we now support a similar arrangement, based on PC-relative group relocations. This means we can elide most literal loads entirely from the entry path, by switching to the ldr_va macro to emit the appropriate sequence depending on the target architecture revision. While at it, switch to the bl_r macro for invoking the right PABT/DABT helpers instead of setting the LR register explicitly, which does not play well with cores that speculate across function returns. Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/assembler.h | 18 +++++++++--------- arch/arm/kernel/entry-armv.S | 37 +++++++------------------------------ arch/arm/kernel/entry-common.S | 10 +--------- arch/arm/kernel/entry-header.S | 3 +-- 4 files changed, 18 insertions(+), 50 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 8e59d74..90fbe4a 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -666,12 +666,11 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) __adldst_l str, \src, \sym, \tmp, \cond .endm - .macro __ldst_va, op, reg, tmp, sym, cond + .macro __ldst_va, op, reg, tmp, sym, cond, offset #if __LINUX_ARM_ARCH__ >= 7 || \ !defined(CONFIG_ARM_HAS_GROUP_RELOCS) || \ (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) mov_l \tmp, \sym, \cond - \op\cond \reg, [\tmp] #else /* * Avoid a literal load, by emitting a sequence of ADD/LDR instructions @@ -683,20 +682,21 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .reloc .L0_\@, R_ARM_ALU_PC_G0_NC, \sym .reloc .L1_\@, R_ARM_ALU_PC_G1_NC, \sym .reloc .L2_\@, R_ARM_LDR_PC_G2, \sym -.L0_\@: sub\cond \tmp, pc, #8 -.L1_\@: sub\cond \tmp, \tmp, #4 -.L2_\@: \op\cond \reg, [\tmp, #0] +.L0_\@: sub\cond \tmp, pc, #8 - \offset +.L1_\@: sub\cond \tmp, \tmp, #4 - \offset +.L2_\@: #endif + \op\cond \reg, [\tmp, #\offset] .endm /* * ldr_va - load a 32-bit word from the virtual address of \sym */ - .macro ldr_va, rd:req, sym:req, cond, tmp + .macro ldr_va, rd:req, sym:req, cond, tmp, offset=0 .ifnb \tmp - __ldst_va ldr, \rd, \tmp, \sym, \cond + __ldst_va ldr, \rd, \tmp, \sym, \cond, \offset .else - __ldst_va ldr, \rd, \rd, \sym, \cond + __ldst_va ldr, \rd, \rd, \sym, \cond, \offset .endif .endm @@ -704,7 +704,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) * str_va - store a 32-bit word to the virtual address of \sym */ .macro str_va, rn:req, sym:req, tmp:req, cond - __ldst_va str, \rn, \tmp, \sym, \cond + __ldst_va str, \rn, \tmp, \sym, \cond, 0 .endm /* diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7a86824..6e7dfb4 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -61,9 +61,8 @@ .macro pabt_helper @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5 #ifdef MULTI_PABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_PABT_FUNC] + ldr_va ip, processor, offset=PROCESSOR_PABT_FUNC + bl_r ip #else bl CPU_PABORT_HANDLER #endif @@ -82,9 +81,8 @@ @ the fault status register in r1. r9 must be preserved. @ #ifdef MULTI_DABORT - ldr ip, .LCprocfns - mov lr, pc - ldr pc, [ip, #PROCESSOR_DABT_FUNC] + ldr_va ip, processor, offset=PROCESSOR_DABT_FUNC + bl_r ip #else bl CPU_DABORT_HANDLER #endif @@ -302,16 +300,6 @@ __fiq_svc: UNWIND(.fnend ) ENDPROC(__fiq_svc) - .align 5 -.LCcralign: - .word cr_alignment -#ifdef MULTI_DABORT -.LCprocfns: - .word processor -#endif -.LCfp: - .word fp_enter - /* * Abort mode handlers */ @@ -370,7 +358,7 @@ ENDPROC(__fiq_abt) THUMB( stmia sp, {r0 - r12} ) ATRAP( mrc p15, 0, r7, c1, c0, 0) - ATRAP( ldr r8, .LCcralign) + ATRAP( ldr_va r8, cr_alignment) ldmia r0, {r3 - r5} add r0, sp, #S_PC @ here for interlock avoidance @@ -379,8 +367,6 @@ ENDPROC(__fiq_abt) str r3, [sp] @ save the "real" r0 copied @ from the exception stack - ATRAP( ldr r8, [r8, #0]) - @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -505,9 +491,7 @@ __und_usr_thumb: */ #if __LINUX_ARM_ARCH__ < 7 /* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */ -#define NEED_CPU_ARCHITECTURE - ldr r5, .LCcpu_architecture - ldr r5, [r5] + ldr_va r5, cpu_architecture cmp r5, #CPU_ARCH_ARMv7 blo __und_usr_fault_16 @ 16bit undefined instruction /* @@ -654,12 +638,6 @@ call_fpe: ret.w lr @ CP#14 (Debug) ret.w lr @ CP#15 (Control) -#ifdef NEED_CPU_ARCHITECTURE - .align 2 -.LCcpu_architecture: - .word __cpu_architecture -#endif - #ifdef CONFIG_NEON .align 6 @@ -685,9 +663,8 @@ call_fpe: #endif do_fpe: - ldr r4, .LCfp add r10, r10, #TI_FPSTATE @ r10 = workspace - ldr pc, [r4] @ Call FP module USR entry point + ldr_va pc, fp_enter, tmp=r4 @ Call FP module USR entry point /* * The FP module is called with these registers set: diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 90d40f4..ad3210e 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -198,7 +198,7 @@ ENTRY(vector_swi) #endif reload_current r10, ip zero_fp - alignment_trap r10, ip, __cr_alignment + alignment_trap r10, ip, cr_alignment asm_trace_hardirqs_on save=0 enable_irq_notrace ct_user_exit save=0 @@ -328,14 +328,6 @@ __sys_trace_return: bl syscall_trace_exit b ret_slow_syscall - .align 5 -#ifdef CONFIG_ALIGNMENT_TRAP - .type __cr_alignment, #object -__cr_alignment: - .word cr_alignment -#endif - .ltorg - .macro syscall_table_start, sym .equ __sys_nr, 0 .type \sym, #object diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 9a1dc14..5865621 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -48,8 +48,7 @@ .macro alignment_trap, rtmp1, rtmp2, label #ifdef CONFIG_ALIGNMENT_TRAP mrc p15, 0, \rtmp2, c1, c0, 0 - ldr \rtmp1, \label - ldr \rtmp1, [\rtmp1] + ldr_va \rtmp1, \label teq \rtmp1, \rtmp2 mcrne p15, 0, \rtmp1, c1, c0, 0 #endif -- 2.7.4