[DAGCombiner] rearrange extract_element+bitcast fold; NFC

author Sanjay Patel <spatel@rotateright.com>

Thu, 11 Oct 2018 23:56:56 +0000 (23:56 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Thu, 11 Oct 2018 23:56:56 +0000 (23:56 +0000)
author Sanjay Patel <spatel@rotateright.com>
Thu, 11 Oct 2018 23:56:56 +0000 (23:56 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Thu, 11 Oct 2018 23:56:56 +0000 (23:56 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 16834dc..7ec5fac 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15499,13 +15499,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      // converts.
    }
  
-  // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
-  bool isLE = DAG.getDataLayout().isLittleEndian();
-  unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
-  if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
-      ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
+  if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST) {
+    // The vector index of the LSBs of the source depend on the endian-ness.
+    bool IsLE = DAG.getDataLayout().isLittleEndian();
+
+    // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
+    unsigned BCTruncElt = IsLE ? 0 : VT.getVectorNumElements() - 1;
      SDValue BCSrc = InVec.getOperand(0);
-    if (BCSrc.getValueType().isScalarInteger())
+    if (InVec.hasOneUse() && ConstEltNo->getZExtValue() == BCTruncElt &&
+        VT.isInteger() && BCSrc.getValueType().isScalarInteger())
        return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
    }
  
diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll

index de8ee70..b3fb50d 100644 (file)
--- a/llvm/test/CodeGen/X86/extract-insert.ll
+++ b/llvm/test/CodeGen/X86/extract-insert.ll
@@ -28,6 +28,10 @@ define i8 @extractelt_bitcast(i32 %x) nounwind {
    ret i8 %ext
  }
  
+; TODO: This should have folded to avoid vector ops, but the transform
+; is guarded by 'hasOneUse'. That limitation apparently makes some AMDGPU 
+; codegen better.
+
  define i8 @extractelt_bitcast_extra_use(i32 %x, <4 x i8>* %p) nounwind {
  ; X86-LABEL: extractelt_bitcast_extra_use:
  ; X86:       # %bb.0:
author	Sanjay Patel <spatel@rotateright.com>
	Thu, 11 Oct 2018 23:56:56 +0000 (23:56 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Thu, 11 Oct 2018 23:56:56 +0000 (23:56 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/extract-insert.ll		patch \| blob \| history