From 7cd725858b4b67ca44a8d34483c28b5e12a4c733 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 29 Dec 2022 16:33:18 -0800 Subject: [PATCH] [RISCV] RISCVDAGToDAGISel::selectShiftMask to shift by (sub size-1, X). If the shift amount is (sub C, X) where C is -1 modulo the size of the shift, we can replace the sub with a NOT. We could also use XORI X, size-1, but NOT would work better with c.not from the future Zce extension. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 11 ++ llvm/test/CodeGen/RISCV/shift-amount-mod.ll | 284 ++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/shift-amount-mod.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b147430..45c6620 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2113,6 +2113,17 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, ShAmt = SDValue(Neg, 0); return true; } + // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X + // to generate a NOT instead of a SUB of a constant. + if (Imm % ShiftWidth == ShiftWidth - 1) { + SDLoc DL(ShAmt); + EVT VT = ShAmt.getValueType(); + MachineSDNode *Not = + CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1), + CurDAG->getTargetConstant(-1, DL, VT)); + ShAmt = SDValue(Not, 0); + return true; + } } return true; diff --git a/llvm/test/CodeGen/RISCV/shift-amount-mod.ll b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll new file mode 100644 index 0000000..ef69758 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll @@ -0,0 +1,284 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +define i32 @shl_by_complemented_32(i32 %x) { +; RV32I-LABEL: shl_by_complemented_32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a0 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: shl_by_complemented_32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i32 31, %x + %b = shl i32 %x, %a + ret i32 %b +} + +define i64 @shl_by_complemented_64(i64 %x) { +; RV32I-LABEL: shl_by_complemented_64: +; RV32I: # %bb.0: +; RV32I-NEXT: not a4, a0 +; RV32I-NEXT: li a3, 31 +; RV32I-NEXT: sub a3, a3, a0 +; RV32I-NEXT: sll a2, a0, a4 +; RV32I-NEXT: bltz a3, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: j .LBB1_3 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: li a4, 63 +; RV32I-NEXT: sub a4, a4, a0 +; RV32I-NEXT: xori a4, a4, 31 +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: or a1, a1, a0 +; RV32I-NEXT: .LBB1_3: +; RV32I-NEXT: slti a0, a3, 0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: shl_by_complemented_64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i64 63, %x + %b = shl i64 %x, %a + ret i64 %b +} + +define i32 @lshr_by_complemented_32(i32 %x) { +; RV32I-LABEL: lshr_by_complemented_32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a0 +; RV32I-NEXT: srl a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lshr_by_complemented_32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: srlw a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i32 31, %x + %b = lshr i32 %x, %a + ret i32 %b +} + +define i64 @lshr_by_complemented_64(i64 %x) { +; RV32I-LABEL: lshr_by_complemented_64: +; RV32I: # %bb.0: +; RV32I-NEXT: not a4, a0 +; RV32I-NEXT: li a3, 31 +; RV32I-NEXT: sub a3, a3, a0 +; RV32I-NEXT: srl a2, a1, a4 +; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: j .LBB3_3 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: srl a4, a0, a4 +; RV32I-NEXT: li a5, 63 +; RV32I-NEXT: sub a5, a5, a0 +; RV32I-NEXT: xori a0, a5, 31 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a0, a1, a0 +; RV32I-NEXT: or a0, a4, a0 +; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: slti a1, a3, 0 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lshr_by_complemented_64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i64 63, %x + %b = lshr i64 %x, %a + ret i64 %b +} + +define i32 @ashr_by_complemented_32(i32 %x) { +; RV32I-LABEL: ashr_by_complemented_32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a0 +; RV32I-NEXT: sra a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ashr_by_complemented_32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: sraw a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i32 31, %x + %b = ashr i32 %x, %a + ret i32 %b +} + +define i64 @ashr_by_complemented_64(i64 %x) { +; RV32I-LABEL: ashr_by_complemented_64: +; RV32I: # %bb.0: +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: li a0, 31 +; RV32I-NEXT: sub a4, a0, a2 +; RV32I-NEXT: not a3, a2 +; RV32I-NEXT: sra a0, a1, a3 +; RV32I-NEXT: bltz a4, .LBB5_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT: srl a3, a2, a3 +; RV32I-NEXT: li a4, 63 +; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: xori a2, a4, 31 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: sll a1, a1, a2 +; RV32I-NEXT: or a3, a3, a1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ashr_by_complemented_64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: sra a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i64 63, %x + %b = ashr i64 %x, %a + ret i64 %b +} + +define i32 @shl_by_masked_complemented_32(i32 %x) { +; RV32I-LABEL: shl_by_masked_complemented_32: +; RV32I: # %bb.0: +; RV32I-NEXT: not a1, a0 +; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: shl_by_masked_complemented_32: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: sllw a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i32 31, %x + %b = and i32 %a, 31 + %c = shl i32 %x, %b + ret i32 %c +} + +define i64 @shl_by_masked_complemented_64(i64 %x) { +; RV32I-LABEL: shl_by_masked_complemented_64: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 63 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: andi a4, a2, 63 +; RV32I-NEXT: addi a2, a4, -32 +; RV32I-NEXT: not a3, a0 +; RV32I-NEXT: bltz a2, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sll a1, a0, a4 +; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: sll a1, a1, a3 +; RV32I-NEXT: xori a4, a4, 31 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: .LBB7_3: +; RV32I-NEXT: sll a0, a0, a3 +; RV32I-NEXT: slti a2, a2, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: shl_by_masked_complemented_64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: sll a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i64 63, %x + %b = and i64 %a, 63 + %c = shl i64 %x, %b + ret i64 %c +} + +define i64 @lshr_by_masked_complemented_64(i64 %x) { +; RV32I-LABEL: lshr_by_masked_complemented_64: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 63 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: andi a4, a2, 63 +; RV32I-NEXT: addi a2, a4, -32 +; RV32I-NEXT: not a3, a0 +; RV32I-NEXT: bltz a2, .LBB8_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srl a0, a1, a4 +; RV32I-NEXT: j .LBB8_3 +; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: srl a0, a0, a3 +; RV32I-NEXT: xori a4, a4, 31 +; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: sll a4, a5, a4 +; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: .LBB8_3: +; RV32I-NEXT: srl a1, a1, a3 +; RV32I-NEXT: slti a2, a2, 0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: lshr_by_masked_complemented_64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i64 63, %x + %b = and i64 %a, 63 + %c = lshr i64 %x, %b + ret i64 %c +} + +define i64 @ashr_by_masked_complemented_64(i64 %x) { +; RV32I-LABEL: ashr_by_masked_complemented_64: +; RV32I: # %bb.0: +; RV32I-NEXT: li a2, 63 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bltz a4, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srai a1, a2, 31 +; RV32I-NEXT: sra a0, a2, a3 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: not a4, a0 +; RV32I-NEXT: sra a1, a2, a4 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: xori a3, a3, 31 +; RV32I-NEXT: slli a2, a2, 1 +; RV32I-NEXT: sll a2, a2, a3 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: ashr_by_masked_complemented_64: +; RV64I: # %bb.0: +; RV64I-NEXT: not a1, a0 +; RV64I-NEXT: sra a0, a0, a1 +; RV64I-NEXT: ret + %a = sub i64 63, %x + %b = and i64 %a, 63 + %c = ashr i64 %x, %b + ret i64 %c +} -- 2.7.4