From 5a4ec8127f7743f83dd17b2ef384958d54c4c95c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 20 Jun 2018 19:45:48 +0000 Subject: [PATCH] AMDGPU: Fix scalar_to_vector for v4i16/v4f16 llvm-svn: 335161 --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 5 ++-- llvm/lib/Target/AMDGPU/SIInstructions.td | 10 ++++++++ llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll | 33 +++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 122939c..1ad10b3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -493,9 +493,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::BUILD_VECTOR: { EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); - - if (VT == MVT::v2i16 || VT == MVT::v2f16) { - if (Opc == ISD::BUILD_VECTOR) { + if (VT.getScalarSizeInBits() == 16) { + if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) { uint32_t LHSVal, RHSVal; if (getConstantValue(N->getOperand(0), LHSVal) && getConstantValue(N->getOperand(1), RHSVal)) { diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 1a7e147..a3b4ea0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1489,6 +1489,16 @@ def : GCNPat < // (COPY $src0) // >; +def : GCNPat < + (v4i16 (scalar_to_vector i16:$src0)), + (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) +>; + +def : GCNPat < + (v4f16 (scalar_to_vector f16:$src0)), + (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0) +>; + //===----------------------------------------------------------------------===// // Fract Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll index 0f09fa1..ff634c6 100644 --- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll +++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll @@ -29,6 +29,39 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, ret void } +; GCN-LABEL: {{^}}scalar_to_vector_v4i16: +; VI: v_lshlrev_b16_e32 +; VI: v_lshlrev_b16_e32 +; VI: v_or_b32_e32 +; VI: v_lshlrev_b32 +; VI: v_or_b32_sdwa +; VI: v_or_b32_sdwa +define amdgpu_kernel void @scalar_to_vector_v4i16() { +bb: + %tmp = load <2 x i8>, <2 x i8> addrspace(1)* undef, align 1 + %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8 + ret void +} + +; GCN-LABEL: {{^}}scalar_to_vector_v4f16: +; VI: v_lshlrev_b16_e32 +; VI: v_lshlrev_b16_e32 +; VI: v_or_b32_e32 +; VI: v_lshlrev_b32 +; VI: v_or_b32_sdwa +; VI: v_or_b32_sdwa +define amdgpu_kernel void @scalar_to_vector_v4f16() { +bb: + %load = load half, half addrspace(1)* undef, align 1 + %tmp = bitcast half %load to <2 x i8> + %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8 + ret void +} + ; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed ; to produce one, but for some reason never made it to selection. -- 2.7.4