From 4916523053d791e17d1cc2135f7a14de16ed1dcc Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 12 Jan 2023 18:24:43 -0800 Subject: [PATCH] [X86] Replace (31/63 -/^ X) with (NOT X) and ignore (32/64 ^ X) when computing shift count Shift count is masked by hardware so these peepholes just extend common patterns for NOT to the lower bits of shift count. As well (32/64 ^ X) is masked off by the shift so can be safely ignored. Reviewed By: pengfei, lebedev.ri Differential Revision: https://reviews.llvm.org/D140087 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 29 +++++++++-- llvm/test/CodeGen/X86/legalize-shift-64.ll | 2 +- llvm/test/CodeGen/X86/not-shift.ll | 82 ++++++++++++++---------------- 3 files changed, 63 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index e90f1b6..8c13ff8 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -3995,17 +3995,36 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { // so we are not afraid that we might mess up BZHI/BEXTR pattern. SDValue NewShiftAmt; - if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { + if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB || + ShiftAmt->getOpcode() == ISD::XOR) { SDValue Add0 = ShiftAmt->getOperand(0); SDValue Add1 = ShiftAmt->getOperand(1); auto *Add0C = dyn_cast(Add0); auto *Add1C = dyn_cast(Add1); - // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X - // to avoid the ADD/SUB. + // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB/XOR. if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) { NewShiftAmt = Add0; - // If we are shifting by N-X where N == 0 mod Size, then just shift by -X - // to generate a NEG instead of a SUB of a constant. + + } else if (ShiftAmt->getOpcode() != ISD::ADD && + ((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) || + (Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) { + // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X + // we can replace it with a NOT. In the XOR case it may save some code + // size, in the SUB case it also may save a move. + assert(Add0C == nullptr || Add1C == nullptr); + + // We can only do N-X, not X-N + if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr) + return false; + + auto *ConstValOp = Add0C == nullptr ? Add1C : Add0C; + EVT OpVT = ShiftAmt.getValueType(); + + NewShiftAmt = CurDAG->getNOT(DL, Add0C == nullptr ? Add0 : Add1, OpVT); + insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); + // If we are shifting by N-X where N == 0 mod Size, then just shift by + // -X to generate a NEG instead of a SUB of a constant. } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && Add0C->getZExtValue() != 0) { EVT SubVT = ShiftAmt.getValueType(); diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll index 57643e3..53208de 100644 --- a/llvm/test/CodeGen/X86/legalize-shift-64.ll +++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll @@ -10,7 +10,7 @@ define i64 @test1(i32 %xx, i32 %test) nounwind { ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: shrl %edx -; CHECK-NEXT: xorb $31, %cl +; CHECK-NEXT: notb %cl ; CHECK-NEXT: shrl %cl, %edx ; CHECK-NEXT: retl %conv = zext i32 %xx to i64 diff --git a/llvm/test/CodeGen/X86/not-shift.ll b/llvm/test/CodeGen/X86/not-shift.ll index 67de3f2..1d2fd19 100644 --- a/llvm/test/CodeGen/X86/not-shift.ll +++ b/llvm/test/CodeGen/X86/not-shift.ll @@ -50,17 +50,17 @@ define i64 @sub63_shiftl64(i64 %val, i64 %cnt) nounwind { ; ; X64-NOBMI2-LABEL: sub63_shiftl64: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: movb $63, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub63_shiftl64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $63, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shlxq %rax, %rdi, %rax +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = sub i64 63, %cnt %result = shl i64 %val, %adjcnt @@ -107,14 +107,14 @@ define i64 @xor63_shiftr64(i64 %val, i64 %cnt) nounwind { ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $63, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor63_shiftr64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $63, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = xor i64 %cnt, 63 @@ -162,14 +162,14 @@ define i64 @sub127_shiftl64(i64 %val, i64 %cnt) nounwind { ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $127, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub127_shiftl64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $127, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = sub i64 127, %cnt @@ -217,14 +217,14 @@ define i64 @xor127_shiftr64(i64 %val, i64 %cnt) nounwind { ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $127, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shrq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor127_shiftr64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $127, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = xor i64 %cnt, 127 @@ -272,14 +272,12 @@ define i64 @xor64_shiftl64(i64 %val, i64 %cnt) nounwind { ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq %rdi, %rax -; X64-NOBMI2-NEXT: xorb $64, %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor64_shiftl64: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $64, %sil ; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax ; X64-BMI2-NEXT: retq %adjcnt = xor i64 %cnt, 64 @@ -401,31 +399,31 @@ define i32 @sub31_shiftr32(i32 %val, i32 %cnt) nounwind { ; X86-NOBMI2-LABEL: sub31_shiftr32: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movb $31, %cl -; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: sub31_shiftr32: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb $31, %al -; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: sub31_shiftr32: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: movb $31, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub31_shiftr32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $31, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxl %eax, %edi, %eax +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = sub i32 31, %cnt %result = lshr i32 %val, %adjcnt @@ -437,14 +435,14 @@ define i32 @xor31_shiftl32(i32 %val, i32 %cnt) nounwind { ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: xorb $31, %cl +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: xor31_shiftl32: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: xorb $31, %al +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -452,14 +450,14 @@ define i32 @xor31_shiftl32(i32 %val, i32 %cnt) nounwind { ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: xorb $31, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor31_shiftl32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $31, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = xor i32 %cnt, 31 @@ -471,31 +469,31 @@ define i32 @sub63_shiftr32(i32 %val, i32 %cnt) nounwind { ; X86-NOBMI2-LABEL: sub63_shiftr32: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movb $63, %cl -; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: sub63_shiftr32: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb $63, %al -; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; ; X64-NOBMI2-LABEL: sub63_shiftr32: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: movb $63, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: notb %cl +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: sub63_shiftr32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $63, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxl %eax, %edi, %eax +; X64-BMI2-NEXT: notb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = sub i32 63, %cnt %result = lshr i32 %val, %adjcnt @@ -507,14 +505,14 @@ define i32 @xor63_shiftl32(i32 %val, i32 %cnt) nounwind { ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: xorb $63, %cl +; X86-NOBMI2-NEXT: notb %cl ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: xor63_shiftl32: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: xorb $63, %al +; X86-BMI2-NEXT: notb %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -522,14 +520,14 @@ define i32 @xor63_shiftl32(i32 %val, i32 %cnt) nounwind { ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: xorb $63, %cl +; X64-NOBMI2-NEXT: notb %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor63_shiftl32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $63, %sil +; X64-BMI2-NEXT: notb %sil ; X64-BMI2-NEXT: shlxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = xor i32 %cnt, 63 @@ -540,16 +538,14 @@ define i32 @xor63_shiftl32(i32 %val, i32 %cnt) nounwind { define i32 @xor32_shiftr32(i32 %val, i32 %cnt) nounwind { ; X86-NOBMI2-LABEL: xor32_shiftr32: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: xorb $32, %cl +; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: xor32_shiftr32: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: xorb $32, %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -557,14 +553,12 @@ define i32 @xor32_shiftr32(i32 %val, i32 %cnt) nounwind { ; X64-NOBMI2: # %bb.0: ; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl %edi, %eax -; X64-NOBMI2-NEXT: xorb $32, %cl ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: xor32_shiftr32: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: xorb $32, %sil ; X64-BMI2-NEXT: shrxl %esi, %edi, %eax ; X64-BMI2-NEXT: retq %adjcnt = xor i32 %cnt, 32 -- 2.7.4