AMDGPU: Fix scalar_to_vector for v4i16/v4f16

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 20 Jun 2018 19:45:48 +0000 (19:45 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 20 Jun 2018 19:45:48 +0000 (19:45 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 20 Jun 2018 19:45:48 +0000 (19:45 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 20 Jun 2018 19:45:48 +0000 (19:45 +0000)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

index 122939c..1ad10b3 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -493,9 +493,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
    case ISD::BUILD_VECTOR: {
      EVT VT = N->getValueType(0);
      unsigned NumVectorElts = VT.getVectorNumElements();
-
-    if (VT == MVT::v2i16 || VT == MVT::v2f16) {
-      if (Opc == ISD::BUILD_VECTOR) {
+    if (VT.getScalarSizeInBits() == 16) {
+      if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
          uint32_t LHSVal, RHSVal;
          if (getConstantValue(N->getOperand(0), LHSVal) &&
              getConstantValue(N->getOperand(1), RHSVal)) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td

index 1a7e147..a3b4ea0 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1489,6 +1489,16 @@ def : GCNPat <
  //   (COPY $src0)
  // >;
  
+def : GCNPat <
+  (v4i16 (scalar_to_vector i16:$src0)),
+  (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
+>;
+
+def : GCNPat <
+  (v4f16 (scalar_to_vector f16:$src0)),
+  (INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
+>;
+
  //===----------------------------------------------------------------------===//
  // Fract Patterns
  //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll

index 0f09fa1..ff634c6 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
@@ -29,6 +29,39 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out,
    ret void
  }
  
+; GCN-LABEL: {{^}}scalar_to_vector_v4i16:
+; VI: v_lshlrev_b16_e32
+; VI: v_lshlrev_b16_e32
+; VI: v_or_b32_e32
+; VI: v_lshlrev_b32
+; VI: v_or_b32_sdwa
+; VI: v_or_b32_sdwa
+define amdgpu_kernel void @scalar_to_vector_v4i16() {
+bb:
+  %tmp = load <2 x i8>, <2 x i8> addrspace(1)* undef, align 1
+  %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+  store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8
+  ret void
+}
+
+; GCN-LABEL: {{^}}scalar_to_vector_v4f16:
+; VI: v_lshlrev_b16_e32
+; VI: v_lshlrev_b16_e32
+; VI: v_or_b32_e32
+; VI: v_lshlrev_b32
+; VI: v_or_b32_sdwa
+; VI: v_or_b32_sdwa
+define amdgpu_kernel void @scalar_to_vector_v4f16() {
+bb:
+  %load = load half, half addrspace(1)* undef, align 1
+  %tmp = bitcast half %load to <2 x i8>
+  %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+  store <8 x i8> %tmp2, <8 x i8> addrspace(1)* undef, align 8
+  ret void
+}
+
  ; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
  ; to produce one, but for some reason never made it to selection.
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 20 Jun 2018 19:45:48 +0000 (19:45 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 20 Jun 2018 19:45:48 +0000 (19:45 +0000)
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll		patch \| blob \| history