X86 MMX: optimize transfer from mmx to i32

author Manman Ren <mren@apple.com>

Tue, 30 Oct 2012 22:15:38 +0000 (22:15 +0000)

committer Manman Ren <mren@apple.com>

Tue, 30 Oct 2012 22:15:38 +0000 (22:15 +0000)
author Manman Ren <mren@apple.com>
Tue, 30 Oct 2012 22:15:38 +0000 (22:15 +0000)
committer Manman Ren <mren@apple.com>
Tue, 30 Oct 2012 22:15:38 +0000 (22:15 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 4b6ab74..ce9b0a8 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14399,6 +14399,14 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
      return NewOp;
  
    SDValue InputVector = N->getOperand(0);
+  // Detect whether we are trying to convert from mmx to i32 and the bitcast
+  // from mmx to v2i32 has a single usage.
+  if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
+      InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
+      InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
+    return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(),
+                       N->getValueType(0),
+                       InputVector.getNode()->getOperand(0));
  
    // Only operate on vectors of 4 elements, where the alternative shuffling
    // gets to be more expensive.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h

index 55c34ac..732c640 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -142,6 +142,10 @@ namespace llvm {
        /// mnemonic, so do I; blame Intel.
        MOVDQ2Q,
  
+      /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+      /// vector to a GPR.
+      MMX_MOVD2W,
+
        /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
        /// i32, corresponds to X86::PEXTRB.
        PEXTRB,
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td

index bd54858..127af6f 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -207,8 +207,14 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
  let mayStore = 1 in
  def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
                          "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>;
-def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
-                        "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>;
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
+                         "movd\t{$src, $dst|$dst, $src}",
+                         [(set GR32:$dst,
+                          (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>;
  
  let neverHasSideEffects = 1 in
  def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
diff --git a/llvm/test/CodeGen/X86/mmx-builtins.ll b/llvm/test/CodeGen/X86/mmx-builtins.ll

index 8b7200d..a8d33f4 100644 (file)
--- a/llvm/test/CodeGen/X86/mmx-builtins.ll
+++ b/llvm/test/CodeGen/X86/mmx-builtins.ll
@@ -1043,6 +1043,20 @@ entry:
    ret i64 %5
  }
  
+define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: test21_2
+; CHECK: pshufw
+; CHECK: movd
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <2 x i32>
+  %5 = extractelement <2 x i32> %4, i32 0
+  ret i32 %5
+}
+
  declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
  
  define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
author	Manman Ren <mren@apple.com>
	Tue, 30 Oct 2012 22:15:38 +0000 (22:15 +0000)
committer	Manman Ren <mren@apple.com>
	Tue, 30 Oct 2012 22:15:38 +0000 (22:15 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
llvm/lib/Target/X86/X86InstrMMX.td		patch \| blob \| history
llvm/test/CodeGen/X86/mmx-builtins.ll		patch \| blob \| history