x86/nospec: Shorten RESET_CALL_DEPTH
authorPeter Zijlstra <peterz@infradead.org>
Fri, 10 Feb 2023 10:10:57 +0000 (10:10 +0000)
committerBorislav Petkov (AMD) <bp@alien8.de>
Wed, 31 May 2023 11:40:57 +0000 (13:40 +0200)
RESET_CALL_DEPTH is a pretty fat monster and blows up UNTRAIN_RET to
20 bytes:

  19:       48 c7 c0 80 00 00 00    mov    $0x80,%rax
  20:       48 c1 e0 38             shl    $0x38,%rax
  24:       65 48 89 04 25 00 00 00 00      mov    %rax,%gs:0x0     29: R_X86_64_32S        pcpu_hot+0x10

Shrink it by 4 bytes:

  0:   31 c0 xor %eax,%eax
  2:   48 0f ba e8 3f bts $0x3f,%rax
  7:   65 48 89 04 25 00 00 00 00 mov %rax,%gs:0x0

Shrink RESET_CALL_DEPTH_FROM_CALL by 5 bytes by only setting %al, the
other bits are shifted out (the same could be done for RESET_CALL_DEPTH,
but the XOR+BTS sequence has less dependencies due to the zeroing).

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230515093020.729622326@infradead.org
arch/x86/include/asm/nospec-branch.h

index edb2b0c..55388c9 100644 (file)
        movq    $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH                                       \
        movq    $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH                                       \
-       mov     $0x80, %rax;                                    \
-       shl     $56, %rax;                                      \
+       xor     %eax, %eax;                                     \
+       bts     $63, %rax;                                      \
        movq    %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH_FROM_CALL                             \
        movq    %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
 
 #define RESET_CALL_DEPTH_FROM_CALL                             \
-       mov     $0xfc, %rax;                                    \
+       movb    $0xfc, %al;                                     \
        shl     $56, %rax;                                      \
        movq    %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);   \
        CALL_THUNKS_DEBUG_INC_CALLS
        shl     $56, %rax;                                      \
        movq    %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);   \
        CALL_THUNKS_DEBUG_INC_CALLS