riscv: Fix build with CONFIG_CC_OPTIMIZE_FOR_SIZE=y
authorSamuel Holland <samuel@sholland.org>
Thu, 22 Sep 2022 06:09:58 +0000 (01:09 -0500)
committerPalmer Dabbelt <palmer@rivosinc.com>
Wed, 1 Feb 2023 05:55:27 +0000 (21:55 -0800)
commit 8eb060e10185 ("arch/riscv: add Zihintpause support") broke
building with CONFIG_CC_OPTIMIZE_FOR_SIZE enabled (gcc 11.1.0):

  CC      arch/riscv/kernel/vdso/vgettimeofday.o
In file included from <command-line>:
./arch/riscv/include/asm/jump_label.h: In function 'cpu_relax':
././include/linux/compiler_types.h:285:33: warning: 'asm' operand 0 probably does not match constraints
  285 | #define asm_volatile_goto(x...) asm goto(x)
      |                                 ^~~
./arch/riscv/include/asm/jump_label.h:41:9: note: in expansion of macro 'asm_volatile_goto'
   41 |         asm_volatile_goto(
      |         ^~~~~~~~~~~~~~~~~
././include/linux/compiler_types.h:285:33: error: impossible constraint in 'asm'
  285 | #define asm_volatile_goto(x...) asm goto(x)
      |                                 ^~~
./arch/riscv/include/asm/jump_label.h:41:9: note: in expansion of macro 'asm_volatile_goto'
   41 |         asm_volatile_goto(
      |         ^~~~~~~~~~~~~~~~~
make[1]: *** [scripts/Makefile.build:249: arch/riscv/kernel/vdso/vgettimeofday.o] Error 1
make: *** [arch/riscv/Makefile:128: vdso_prepare] Error 2

Having a static branch in cpu_relax() is problematic because that
function is widely inlined, including in some quite complex functions
like in the VDSO. A quick measurement shows this static branch is
responsible by itself for around 40% of the jump table.

Drop the static branch, which ends up being the same number of
instructions anyway. If Zihintpause is supported, we trade the nop from
the static branch for a div. If Zihintpause is unsupported, we trade the
jump from the static branch for (what gets interpreted as) a nop.

Fixes: 8eb060e10185 ("arch/riscv: add Zihintpause support")
Signed-off-by: Samuel Holland <samuel@sholland.org>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/include/asm/hwcap.h
arch/riscv/include/asm/vdso/processor.h

index 3c8a5ca..3bf10a8 100644 (file)
@@ -67,7 +67,6 @@ enum riscv_isa_ext_id {
  */
 enum riscv_isa_ext_key {
        RISCV_ISA_EXT_KEY_FPU,          /* For 'F' and 'D' */
-       RISCV_ISA_EXT_KEY_ZIHINTPAUSE,
        RISCV_ISA_EXT_KEY_MAX,
 };
 
@@ -87,8 +86,6 @@ static __always_inline int riscv_isa_ext2key(int num)
                return RISCV_ISA_EXT_KEY_FPU;
        case RISCV_ISA_EXT_d:
                return RISCV_ISA_EXT_KEY_FPU;
-       case RISCV_ISA_EXT_ZIHINTPAUSE:
-               return RISCV_ISA_EXT_KEY_ZIHINTPAUSE;
        default:
                return -EINVAL;
        }
index 1e4f8b4..789bdb8 100644 (file)
@@ -4,30 +4,25 @@
 
 #ifndef __ASSEMBLY__
 
-#include <linux/jump_label.h>
 #include <asm/barrier.h>
-#include <asm/hwcap.h>
 
 static inline void cpu_relax(void)
 {
-       if (!static_branch_likely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_ZIHINTPAUSE])) {
 #ifdef __riscv_muldiv
-               int dummy;
-               /* In lieu of a halt instruction, induce a long-latency stall. */
-               __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
+       int dummy;
+       /* In lieu of a halt instruction, induce a long-latency stall. */
+       __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
 #endif
-       } else {
-               /*
-                * Reduce instruction retirement.
-                * This assumes the PC changes.
-                */
+       /*
+        * Reduce instruction retirement.
+        * This assumes the PC changes.
+        */
 #ifdef __riscv_zihintpause
-               __asm__ __volatile__ ("pause");
+       __asm__ __volatile__ ("pause");
 #else
-               /* Encoding of the pause instruction */
-               __asm__ __volatile__ (".4byte 0x100000F");
+       /* Encoding of the pause instruction */
+       __asm__ __volatile__ (".4byte 0x100000F");
 #endif
-       }
        barrier();
 }