bpf,x64: avoid unnecessary instructions when shift dest is ecx
authorJie Meng <jmeng@fb.com>
Fri, 7 Oct 2022 20:23:47 +0000 (13:23 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Wed, 19 Oct 2022 23:53:51 +0000 (16:53 -0700)
x64 JIT produces redundant instructions when a shift operation's
destination register is BPF_REG_4/ecx and this patch removes them.

Specifically, when dest reg is BPF_REG_4 but the src isn't, we
needn't push and pop ecx around shift only to get it overwritten
by r11 immediately afterwards.

In the rare case when both dest and src registers are BPF_REG_4,
a single shift instruction is sufficient and we don't need the
two MOV instructions around the shift.

To summarize using shift left as an example, without patch:
-------------------------------------------------
            |   dst == ecx     |    dst != ecx
=================================================
src == ecx  |   mov r11, ecx   |    shl dst, cl
            |   shl r11, ecx   |
            |   mov ecx, r11   |
-------------------------------------------------
src != ecx  |   mov r11, ecx   |    push ecx
            |   push ecx       |    mov ecx, src
            |   mov ecx, src   |    shl dst, cl
            |   shl r11, cl    |    pop ecx
            |   pop ecx        |
            |   mov ecx, r11   |
-------------------------------------------------

With patch:
-------------------------------------------------
            |   dst == ecx     |    dst != ecx
=================================================
src == ecx  |   shl ecx, cl    |    shl dst, cl
-------------------------------------------------
src != ecx  |   mov r11, ecx   |    push ecx
            |   mov ecx, src   |    mov ecx, src
            |   shl r11, cl    |    shl dst, cl
            |   mov ecx, r11   |    pop ecx
-------------------------------------------------

Signed-off-by: Jie Meng <jmeng@fb.com>
Link: https://lore.kernel.org/r/20221007202348.1118830-2-jmeng@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
arch/x86/net/bpf_jit_comp.c

index 0abd082..d926ca6 100644 (file)
@@ -1138,16 +1138,15 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
                case BPF_ALU64 | BPF_RSH | BPF_X:
                case BPF_ALU64 | BPF_ARSH | BPF_X:
 
-                       /* Check for bad case when dst_reg == rcx */
-                       if (dst_reg == BPF_REG_4) {
-                               /* mov r11, dst_reg */
-                               EMIT_mov(AUX_REG, dst_reg);
-                               dst_reg = AUX_REG;
-                       }
-
                        if (src_reg != BPF_REG_4) { /* common case */
-                               EMIT1(0x51); /* push rcx */
-
+                               /* Check for bad case when dst_reg == rcx */
+                               if (dst_reg == BPF_REG_4) {
+                                       /* mov r11, dst_reg */
+                                       EMIT_mov(AUX_REG, dst_reg);
+                                       dst_reg = AUX_REG;
+                               } else {
+                                       EMIT1(0x51); /* push rcx */
+                               }
                                /* mov rcx, src_reg */
                                EMIT_mov(BPF_REG_4, src_reg);
                        }
@@ -1159,12 +1158,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
                        b3 = simple_alu_opcodes[BPF_OP(insn->code)];
                        EMIT2(0xD3, add_1reg(b3, dst_reg));
 
-                       if (src_reg != BPF_REG_4)
-                               EMIT1(0x59); /* pop rcx */
+                       if (src_reg != BPF_REG_4) {
+                               if (insn->dst_reg == BPF_REG_4)
+                                       /* mov dst_reg, r11 */
+                                       EMIT_mov(insn->dst_reg, AUX_REG);
+                               else
+                                       EMIT1(0x59); /* pop rcx */
+                       }
 
-                       if (insn->dst_reg == BPF_REG_4)
-                               /* mov dst_reg, r11 */
-                               EMIT_mov(insn->dst_reg, AUX_REG);
                        break;
 
                case BPF_ALU | BPF_END | BPF_FROM_BE: