From 857563eaf02f7aa3cc3748e2c36b45ae14294bf8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 4 Nov 2020 10:15:30 -0800 Subject: [PATCH] [RISCV] Check all 64-bits of the mask in SelectRORIW. We need to ensure the upper 32 bits of the mask are zero. So that the srl shifts zeroes into the lower 32 bits. Differential Revision: https://reviews.llvm.org/D90585 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 16 ++++++++++------ llvm/test/CodeGen/RISCV/rv64Zbbp.ll | 28 ++++++++++++++++++---------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 853e81c..54219e9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -381,10 +381,11 @@ bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) { // Then we check that the constant operands respect these constraints: // // VC2 == 32 - VC1 -// VC3 == maskLeadingOnes(VC2) +// VC3 | maskTrailingOnes(VC1) == 0xffffffff // // being VC1 the Shamt we need, VC2 the complementary of Shamt over 32 -// and VC3 a 32 bit mask of (32 - VC1) leading ones. +// and VC3 being 0xffffffff after accounting for SimplifyDemandedBits removing +// some bits due to the right shift. bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) { if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && @@ -406,11 +407,14 @@ bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) { isa(Srl.getOperand(1)) && isa(Shl.getOperand(1)) && isa(And.getOperand(1))) { - uint32_t VC1 = Srl.getConstantOperandVal(1); - uint32_t VC2 = Shl.getConstantOperandVal(1); - uint32_t VC3 = And.getConstantOperandVal(1); + uint64_t VC1 = Srl.getConstantOperandVal(1); + uint64_t VC2 = Shl.getConstantOperandVal(1); + uint64_t VC3 = And.getConstantOperandVal(1); + // The mask needs to be 0xffffffff, but SimplifyDemandedBits may + // have removed lower bits that aren't necessary due to the right + // shift. if (VC2 == (32 - VC1) && - VC3 == maskLeadingOnes(VC2)) { + (VC3 | maskTrailingOnes(VC1)) == 0xffffffff) { RS1 = Shl.getOperand(0); Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N), Srl.getOperand(1).getValueType()); diff --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll index 063a695..a2a5458 100644 --- a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll @@ -378,7 +378,6 @@ define signext i32 @not_rori_i32(i32 signext %x, i32 signext %y) nounwind { ; This is similar to the type legalized roriw pattern, but the and mask is more ; than 32 bits so the lshr doesn't shift zeroes into the lower 32 bits. Make ; sure we don't match it to roriw. -; FIXME: We are currently truncating the mask to 32-bits before checking. define i64 @roriw_bug(i64 %x) nounwind { ; RV64I-LABEL: roriw_bug: ; RV64I: # %bb.0: @@ -392,23 +391,32 @@ define i64 @roriw_bug(i64 %x) nounwind { ; ; RV64IB-LABEL: roriw_bug: ; RV64IB: # %bb.0: -; RV64IB-NEXT: andi a1, a0, -2 -; RV64IB-NEXT: roriw a0, a0, 1 -; RV64IB-NEXT: xor a0, a1, a0 +; RV64IB-NEXT: slli a1, a0, 31 +; RV64IB-NEXT: andi a0, a0, -2 +; RV64IB-NEXT: srli a2, a0, 1 +; RV64IB-NEXT: or a1, a1, a2 +; RV64IB-NEXT: sext.w a1, a1 +; RV64IB-NEXT: xor a0, a0, a1 ; RV64IB-NEXT: ret ; ; RV64IBB-LABEL: roriw_bug: ; RV64IBB: # %bb.0: -; RV64IBB-NEXT: andi a1, a0, -2 -; RV64IBB-NEXT: roriw a0, a0, 1 -; RV64IBB-NEXT: xor a0, a1, a0 +; RV64IBB-NEXT: slli a1, a0, 31 +; RV64IBB-NEXT: andi a0, a0, -2 +; RV64IBB-NEXT: srli a2, a0, 1 +; RV64IBB-NEXT: or a1, a1, a2 +; RV64IBB-NEXT: sext.w a1, a1 +; RV64IBB-NEXT: xor a0, a0, a1 ; RV64IBB-NEXT: ret ; ; RV64IBP-LABEL: roriw_bug: ; RV64IBP: # %bb.0: -; RV64IBP-NEXT: andi a1, a0, -2 -; RV64IBP-NEXT: roriw a0, a0, 1 -; RV64IBP-NEXT: xor a0, a1, a0 +; RV64IBP-NEXT: slli a1, a0, 31 +; RV64IBP-NEXT: andi a0, a0, -2 +; RV64IBP-NEXT: srli a2, a0, 1 +; RV64IBP-NEXT: or a1, a1, a2 +; RV64IBP-NEXT: sext.w a1, a1 +; RV64IBP-NEXT: xor a0, a0, a1 ; RV64IBP-NEXT: ret %a = shl i64 %x, 31 %b = and i64 %x, 18446744073709551614 -- 2.7.4