From: Yingchi Long Date: Fri, 9 Dec 2022 13:05:37 +0000 (+0800) Subject: [RISCV][VP] expand vp intrinsics if no +zve32x feature X-Git-Tag: upstream/17.0.6~21082 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6c09a4e5ba2e4197ec6acdb448cd405002ce08f5;p=platform%2Fupstream%2Fllvm.git [RISCV][VP] expand vp intrinsics if no +zve32x feature If the subtarget does not support VInstructions, expand vp intrinscs to scalar instructions. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D139706 --- diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index badb840..c4cc798 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -242,10 +242,11 @@ public: TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const { using VPLegalization = TargetTransformInfo::VPLegalization; - if (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul && - cast(PI.getArgOperand(1)->getType()) - ->getElementType() - ->getIntegerBitWidth() != 1) + if (!ST->hasVInstructions() || + (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul && + cast(PI.getArgOperand(1)->getType()) + ->getElementType() + ->getIntegerBitWidth() != 1)) return VPLegalization(VPLegalization::Discard, VPLegalization::Convert); return VPLegalization(VPLegalization::Legal, VPLegalization::Legal); } diff --git a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll new file mode 100644 index 0000000..b1d52a6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 +; Should expand to scalar instructions and do not crash + +declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32) + +define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) { +; RV32-LABEL: vpreduce_add_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: lw a4, 4(a1) +; RV32-NEXT: lw a5, 12(a1) +; RV32-NEXT: lw a6, 8(a1) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a7, 0(a2) +; RV32-NEXT: lw t0, 8(a2) +; RV32-NEXT: lw t1, 12(a2) +; RV32-NEXT: lw a2, 4(a2) +; RV32-NEXT: snez t2, a3 +; RV32-NEXT: sltiu t3, a3, 3 +; RV32-NEXT: xori t3, t3, 1 +; RV32-NEXT: sltiu t4, a3, 4 +; RV32-NEXT: xori t4, t4, 1 +; RV32-NEXT: sltiu a3, a3, 2 +; RV32-NEXT: xori a3, a3, 1 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: and a3, t4, t1 +; RV32-NEXT: and t0, t3, t0 +; RV32-NEXT: and a7, t2, a7 +; RV32-NEXT: neg a7, a7 +; RV32-NEXT: and a1, a7, a1 +; RV32-NEXT: neg a7, t0 +; RV32-NEXT: and a6, a7, a6 +; RV32-NEXT: neg a3, a3 +; RV32-NEXT: and a3, a3, a5 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a2, a6, a2 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vpreduce_add_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: lw a4, 8(a1) +; RV64-NEXT: lw a5, 24(a1) +; RV64-NEXT: lw a6, 16(a1) +; RV64-NEXT: lw a1, 0(a1) +; RV64-NEXT: ld a7, 0(a2) +; RV64-NEXT: ld t0, 16(a2) +; RV64-NEXT: ld t1, 24(a2) +; RV64-NEXT: ld a2, 8(a2) +; RV64-NEXT: sext.w a3, a3 +; RV64-NEXT: snez t2, a3 +; RV64-NEXT: sltiu t3, a3, 3 +; RV64-NEXT: xori t3, t3, 1 +; RV64-NEXT: sltiu t4, a3, 4 +; RV64-NEXT: xori t4, t4, 1 +; RV64-NEXT: sltiu a3, a3, 2 +; RV64-NEXT: xori a3, a3, 1 +; RV64-NEXT: and a2, a3, a2 +; RV64-NEXT: and a3, t4, t1 +; RV64-NEXT: and t0, t3, t0 +; RV64-NEXT: and a7, t2, a7 +; RV64-NEXT: negw a7, a7 +; RV64-NEXT: and a1, a7, a1 +; RV64-NEXT: negw a7, t0 +; RV64-NEXT: and a6, a7, a6 +; RV64-NEXT: negw a3, a3 +; RV64-NEXT: and a3, a3, a5 +; RV64-NEXT: negw a2, a2 +; RV64-NEXT: and a2, a2, a4 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a2, a6, a2 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: addw a0, a1, a0 +; RV64-NEXT: ret + %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) + ret i32 %r +}