bpf, x64: Save bytes for DIV by reducing reg copies
authorJie Meng <jmeng@fb.com>
Sat, 2 Oct 2021 03:56:26 +0000 (20:56 -0700)
committerDaniel Borkmann <daniel@iogearbox.net>
Wed, 6 Oct 2021 13:24:36 +0000 (15:24 +0200)
Instead of unconditionally performing push/pop on %rax/%rdx in case of
division/modulo, we can save a few bytes in case of destination register
being either BPF r0 (%rax) or r3 (%rdx) since the result is written in
there anyway.

Also, we do not need to copy the source to %r11 unless the source is either
%rax, %rdx or an immediate.

For example, before the patch:

  22:   push   %rax
  23:   push   %rdx
  24:   mov    %rsi,%r11
  27:   xor    %edx,%edx
  29:   div    %r11
  2c:   mov    %rax,%r11
  2f:   pop    %rdx
  30:   pop    %rax
  31:   mov    %r11,%rax

After:

  22:   push   %rdx
  23:   xor    %edx,%edx
  25:   div    %rsi
  28:   pop    %rdx

Signed-off-by: Jie Meng <jmeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211002035626.2041910-1-jmeng@fb.com
arch/x86/net/bpf_jit_comp.c
tools/testing/selftests/bpf/verifier/jit.c

index 576ef1a..5a0edea 100644 (file)
@@ -1028,19 +1028,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                case BPF_ALU64 | BPF_MOD | BPF_X:
                case BPF_ALU64 | BPF_DIV | BPF_X:
                case BPF_ALU64 | BPF_MOD | BPF_K:
-               case BPF_ALU64 | BPF_DIV | BPF_K:
-                       EMIT1(0x50); /* push rax */
-                       EMIT1(0x52); /* push rdx */
-
-                       if (BPF_SRC(insn->code) == BPF_X)
-                               /* mov r11, src_reg */
-                               EMIT_mov(AUX_REG, src_reg);
-                       else
+               case BPF_ALU64 | BPF_DIV | BPF_K: {
+                       bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+                       if (dst_reg != BPF_REG_0)
+                               EMIT1(0x50); /* push rax */
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x52); /* push rdx */
+
+                       if (BPF_SRC(insn->code) == BPF_X) {
+                               if (src_reg == BPF_REG_0 ||
+                                   src_reg == BPF_REG_3) {
+                                       /* mov r11, src_reg */
+                                       EMIT_mov(AUX_REG, src_reg);
+                                       src_reg = AUX_REG;
+                               }
+                       } else {
                                /* mov r11, imm32 */
                                EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
+                               src_reg = AUX_REG;
+                       }
 
-                       /* mov rax, dst_reg */
-                       EMIT_mov(BPF_REG_0, dst_reg);
+                       if (dst_reg != BPF_REG_0)
+                               /* mov rax, dst_reg */
+                               emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg);
 
                        /*
                         * xor edx, edx
@@ -1048,26 +1059,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                         */
                        EMIT2(0x31, 0xd2);
 
-                       if (BPF_CLASS(insn->code) == BPF_ALU64)
-                               /* div r11 */
-                               EMIT3(0x49, 0xF7, 0xF3);
-                       else
-                               /* div r11d */
-                               EMIT3(0x41, 0xF7, 0xF3);
-
-                       if (BPF_OP(insn->code) == BPF_MOD)
-                               /* mov r11, rdx */
-                               EMIT3(0x49, 0x89, 0xD3);
-                       else
-                               /* mov r11, rax */
-                               EMIT3(0x49, 0x89, 0xC3);
-
-                       EMIT1(0x5A); /* pop rdx */
-                       EMIT1(0x58); /* pop rax */
-
-                       /* mov dst_reg, r11 */
-                       EMIT_mov(dst_reg, AUX_REG);
+                       if (is64)
+                               EMIT1(add_1mod(0x48, src_reg));
+                       else if (is_ereg(src_reg))
+                               EMIT1(add_1mod(0x40, src_reg));
+                       /* div src_reg */
+                       EMIT2(0xF7, add_1reg(0xF0, src_reg));
+
+                       if (BPF_OP(insn->code) == BPF_MOD &&
+                           dst_reg != BPF_REG_3)
+                               /* mov dst_reg, rdx */
+                               emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3);
+                       else if (BPF_OP(insn->code) == BPF_DIV &&
+                                dst_reg != BPF_REG_0)
+                               /* mov dst_reg, rax */
+                               emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0);
+
+                       if (dst_reg != BPF_REG_3)
+                               EMIT1(0x5A); /* pop rdx */
+                       if (dst_reg != BPF_REG_0)
+                               EMIT1(0x58); /* pop rax */
                        break;
+               }
 
                case BPF_ALU | BPF_MUL | BPF_K:
                case BPF_ALU64 | BPF_MUL | BPF_K:
index eedcb75..79021c3 100644 (file)
        .retval = 2,
 },
 {
+       "jit: various div tests",
+       .insns = {
+       BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
+       BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
+       BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+       BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+       BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL),
+       BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL),
+       BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
+       BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
+       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+       BPF_LD_IMM64(BPF_REG_3, 0xbeefULL),
+       BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3),
+       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
+       BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+       BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
+       BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
+       BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_IMM(BPF_REG_0, 2),
+       BPF_EXIT_INSN(),
+       },
+       .result = ACCEPT,
+       .retval = 2,
+},
+{
        "jit: jsgt, jslt",
        .insns = {
        BPF_LD_IMM64(BPF_REG_1, 0x80000000ULL),