[AVX512] Update X86InstrInfo::foldMemoryOperandCustom to handle the EVEX encoded...

author Craig Topper <craig.topper@gmail.com>

Fri, 22 Jul 2016 05:00:35 +0000 (05:00 +0000)

committer Craig Topper <craig.topper@gmail.com>

Fri, 22 Jul 2016 05:00:35 +0000 (05:00 +0000)
author Craig Topper <craig.topper@gmail.com>
Fri, 22 Jul 2016 05:00:35 +0000 (05:00 +0000)
committer Craig Topper <craig.topper@gmail.com>
Fri, 22 Jul 2016 05:00:35 +0000 (05:00 +0000)
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp

index e64f454..052191a 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5780,6 +5780,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
    switch (MI.getOpcode()) {
    case X86::INSERTPSrr:
    case X86::VINSERTPSrr:
+  case X86::VINSERTPSZrr:
      // Attempt to convert the load of inserted vector into a fold load
      // of a single float.
      if (OpNum == 2) {
@@ -5793,8 +5794,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
          int PtrOffset = SrcIdx * 4;
          unsigned NewImm = (DstIdx << 4) | ZMask;
          unsigned NewOpCode =
-            (MI.getOpcode() == X86::VINSERTPSrr ? X86::VINSERTPSrm
-                                                : X86::INSERTPSrm);
+            (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm :
+            (MI.getOpcode() == X86::VINSERTPSrr)  ? X86::VINSERTPSrm  :
+                                                    X86::INSERTPSrm;
          MachineInstr *NewMI =
              FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
          NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
@@ -5804,6 +5806,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
      break;
    case X86::MOVHLPSrr:
    case X86::VMOVHLPSrr:
+  case X86::VMOVHLPSZrr:
      // Move the upper 64-bits of the second operand to the lower 64-bits.
      // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
      // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
@@ -5811,8 +5814,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
        unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
        if (Size <= RCSize && 8 <= Align) {
          unsigned NewOpCode =
-            (MI.getOpcode() == X86::VMOVHLPSrr ? X86::VMOVLPSrm
-                                               : X86::MOVLPSrm);
+            (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
+            (MI.getOpcode() == X86::VMOVHLPSrr)  ? X86::VMOVLPSrm     :
+                                                   X86::MOVLPSrm;
          MachineInstr *NewMI =
              FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
          return NewMI;
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll

index 3ab96e3..d79babf 100644 (file)
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll
@@ -68,6 +68,16 @@ define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
  }
  declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
  
+define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ;CHECK-LABEL: stack_fold_insertps
+  ;CHECK:       vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  ;CHECK-NEXT:                                                                              {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
+  ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
+
  define double @stack_fold_mulsd(double %a0, double %a1) {
    ;CHECK-LABEL: stack_fold_mulsd
    ;CHECK:       vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
author	Craig Topper <craig.topper@gmail.com>
	Fri, 22 Jul 2016 05:00:35 +0000 (05:00 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Fri, 22 Jul 2016 05:00:35 +0000 (05:00 +0000)
llvm/lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/stack-folding-fp-avx512vl.ll		patch \| blob \| history