DAGCombiner: Turn extract of bitcasted integer into truncate

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 1 Mar 2016 18:01:37 +0000 (18:01 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 1 Mar 2016 18:01:37 +0000 (18:01 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 1 Mar 2016 18:01:37 +0000 (18:01 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 1 Mar 2016 18:01:37 +0000 (18:01 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 3346ea7..d2d48ea 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12180,6 +12180,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      // converts.
    }
  
+  // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
+  if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
+      ConstEltNo->isNullValue()) {
+    SDValue BCSrc = InVec.getOperand(0);
+    if (BCSrc.getValueType().isScalarInteger())
+      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
+  }
+
    // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
    // We only perform this optimization before the op legalization phase because
    // we may introduce new vector instructions which are not backed by TD
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 58de94c..5fa7f73 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24850,13 +24850,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
    if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
        N->getValueType(0) == MVT::i32 &&
        InputVector.getValueType() == MVT::v2i32) {
-
-    // The bitcast source is a direct mmx result.
      SDValue MMXSrc = InputVector.getNode()->getOperand(0);
-    if (MMXSrc.getValueType() == MVT::x86mmx)
-      return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
-                         N->getValueType(0),
-                         InputVector.getNode()->getOperand(0));
  
      // The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))).
      if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() &&
@@ -27940,11 +27934,22 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
  
  static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
                                        const X86Subtarget &Subtarget) {
+
+  SDValue Src = N->getOperand(0);
+
    // Try to detect AVG pattern first.
-  if (SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG,
+  if (SDValue Avg = detectAVGPattern(Src, N->getValueType(0), DAG,
                                       Subtarget, SDLoc(N)))
      return Avg;
  
+  // The bitcast source is a direct mmx result.
+  // Detect bitcasts between i32 to x86mmx
+  if (Src.getOpcode() == ISD::BITCAST && N->getValueType(0) == MVT::i32) {
+    SDValue BCSrc = Src.getOperand(0);
+    if (BCSrc.getValueType() == MVT::x86mmx)
+      return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(N), MVT::i32, BCSrc);
+  }
+
    return combineVectorTruncation(N, DAG, Subtarget);
  }
  
diff --git a/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll

new file mode 100644 (file)

index 0000000..57a7c52
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; Make sure the add and load are reduced to 32-bits even with the
+; bitcast to vector.
+; GCN-LABEL: {{^}}bitcast_int_to_vector_extract_0:
+; GCN-DAG: s_load_dword [[B:s[0-9]+]]
+; GCN-DAG: buffer_load_dword [[A:v[0-9]+]]
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, [[B]], [[A]]
+; GCN: buffer_store_dword [[ADD]]
+define void @bitcast_int_to_vector_extract_0(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %b) {
+   %a = load i64, i64 addrspace(1)* %in
+   %add = add i64 %a, %b
+   %val.bc = bitcast i64 %add to <2 x i32>
+   %extract = extractelement <2 x i32> %val.bc, i32 0
+   store i32 %extract, i32 addrspace(1)* %out
+  ret void
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 1 Mar 2016 18:01:37 +0000 (18:01 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 1 Mar 2016 18:01:37 +0000 (18:01 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll	[new file with mode: 0644]	patch \| blob