From 90fd0622b63d2c7addb8b97d98e134ea63d3f037 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 1 Mar 2018 22:22:31 +0000
Subject: [PATCH] [X86][MMX] Improve handling of 64-bit MMX constants

64-bit MMX constant generation usually ends up lowering into SSE instructions before being spilled/reloaded as a MMX type.

This patch bitcasts the constant to a double value to allow correct loading directly to the MMX register.

I've added MMX constant asm comment support to improve testing, it's better to always print the double values as hex constants as MMX is mainly an integer unit (and even with 3DNow! its just floats).

Differential Revision: https://reviews.llvm.org/D43616

llvm-svn: 326497
---
 llvm/lib/Target/X86/X86ISelLowering.cpp     | 17 +++++++++++------
 llvm/lib/Target/X86/X86MCInstLower.cpp      | 18 ++++++++++++++++++
 llvm/test/CodeGen/X86/fast-isel-bc.ll       | 17 ++---------------
 llvm/test/CodeGen/X86/vector-shuffle-mmx.ll | 11 ++---------
 4 files changed, 33 insertions(+), 30 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8921bd4..56c6bd9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30833,14 +30833,19 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
   // it's better to handle them early to be sure we emit efficient code by
   // avoiding store-load conversions.
   if (VT == MVT::x86mmx) {
-    // Detect zero-extended MMX constant vectors.
+    // Detect MMX constant vectors.
     APInt UndefElts;
-    SmallVector<APInt, 2> EltBits;
-    if (getTargetConstantBitsFromNode(N0, 32, UndefElts, EltBits) &&
-        EltBits[1] == 0) {
+    SmallVector<APInt, 1> EltBits;
+    if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) {
       SDLoc DL(N0);
-      return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
-                         DAG.getConstant(EltBits[0], DL, MVT::i32));
+      // Handle zero-extension of i32 with MOVD.
+      if (EltBits[0].countLeadingZeros() >= 32)
+        return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
+                           DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32));
+      // Else, bitcast to a double.
+      // TODO - investigate supporting sext 32-bit immediates on x86_64.
+      APFloat F64(APFloat::IEEEdouble(), EltBits[0]);
+      return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64));
     }
 
     // Detect bitcasts to x86mmx low word.
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 730ba745..f5f87c1 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1822,6 +1822,24 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     break;
   }
 
+  case X86::MMX_MOVQ64rm: {
+    if (!OutStreamer->isVerboseAsm())
+      break;
+    if (MI->getNumOperands() <= 4)
+      break;
+    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+      std::string Comment;
+      raw_string_ostream CS(Comment);
+      const MachineOperand &DstOp = MI->getOperand(0);
+      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+      if (auto *CF = dyn_cast<ConstantFP>(C)) {
+        CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
+        OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+      }
+    }
+    break;
+  }
+
 #define MOV_CASE(Prefix, Suffix)        \
   case X86::Prefix##MOVAPD##Suffix##rm: \
   case X86::Prefix##MOVAPS##Suffix##rm: \
diff --git a/llvm/test/CodeGen/X86/fast-isel-bc.ll b/llvm/test/CodeGen/X86/fast-isel-bc.ll
index 3bc84c8..3287f99 100644
--- a/llvm/test/CodeGen/X86/fast-isel-bc.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-bc.ll
@@ -7,19 +7,12 @@
 declare void @func2(x86_mmx)
 
 ; This isn't spectacular, but it's MMX code at -O0...
-; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
-; so we get pretty poor code.  The below is preferable.
-; CHEK: movl $2, %eax
-; CHEK: movd %rax, %mm0
-; CHEK: movd %mm0, %rdi
 
 define void @func1() nounwind {
 ; X86-LABEL: func1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT:    movsd %xmm0, (%esp)
-; X86-NEXT:    movq (%esp), %mm0
+; X86-NEXT:    movq LCPI0_0, %mm0 ## mm0 = 0x200000000
 ; X86-NEXT:    calll _func2
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -27,13 +20,7 @@ define void @func1() nounwind {
 ; X64-LABEL: func1:
 ; X64:       ## %bb.0:
 ; X64-NEXT:    pushq %rax
-; X64-NEXT:    movl $2, %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    movq %rcx, %xmm0
-; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT:    movq %xmm0, (%rsp)
-; X64-NEXT:    movq (%rsp), %mm0
+; X64-NEXT:    movq {{.*}}(%rip), %mm0 ## mm0 = 0x200000000
 ; X64-NEXT:    movq2dq %mm0, %xmm0
 ; X64-NEXT:    callq _func2
 ; X64-NEXT:    popq %rax
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
index a2f6ecc..a00df14 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -33,25 +33,18 @@ define void @test1() {
 ; X32:       ## %bb.0: ## %entry
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    subl $8, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    .cfi_offset %edi, -8
 ; X32-NEXT:    pxor %mm0, %mm0
-; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    movsd %xmm0, (%esp)
-; X32-NEXT:    movq (%esp), %mm1
+; X32-NEXT:    movq LCPI1_0, %mm1 ## mm1 = 0x7070606040400000
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    maskmovq %mm1, %mm0
-; X32-NEXT:    addl $8, %esp
 ; X32-NEXT:    popl %edi
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test1:
 ; X64:       ## %bb.0: ## %entry
 ; X64-NEXT:    pxor %mm0, %mm0
-; X64-NEXT:    movq {{.*}}(%rip), %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm1
+; X64-NEXT:    movq {{.*}}(%rip), %mm1 ## mm1 = 0x7070606040400000
 ; X64-NEXT:    xorl %edi, %edi
 ; X64-NEXT:    maskmovq %mm1, %mm0
 ; X64-NEXT:    retq
-- 
2.7.4