[AVX-512] Lower AVX cvtpd2ps intrinsic to ISD::FP_ROUND so it can use EVEX instructio...
authorCraig Topper <craig.topper@gmail.com>
Sun, 6 Nov 2016 04:12:46 +0000 (04:12 +0000)
committerCraig Topper <craig.topper@gmail.com>
Sun, 6 Nov 2016 04:12:46 +0000 (04:12 +0000)
llvm-svn: 286057

llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrSSE.td
llvm/lib/Target/X86/X86IntrinsicsInfo.h
llvm/test/CodeGen/X86/avx-intrinsics-x86.ll

index dd5d195..a8f2a20 100644 (file)
@@ -18062,7 +18062,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
                                               Src1, Src2, Src3, Src4),
                                   Mask, PassThru, Subtarget, DAG);
     }
-    case CVTPD2PS: {
+    case CVTPD2PS:
+      // ISD::FP_ROUND has a second argument that indicates if the truncation
+      // does not change the value. Set it to 0 since it can change.
+      return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
+                         DAG.getIntPtrConstant(0, dl));
+    case CVTPD2PS_MASK: {
       SDValue Src = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
       SDValue Mask = Op.getOperand(3);
index dbcf0d7..3e83f69 100644 (file)
@@ -2228,16 +2228,16 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                         [], IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
 
 // YMM only
+let Predicates = [HasAVX, NoVLX] in {
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
+                        [(set VR128:$dst, (fpround VR256:$src))],
                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
+                        [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))],
                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+}
 def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
                 (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
 
@@ -2261,10 +2261,6 @@ let Predicates = [HasAVX, NoVLX] in {
             (VCVTPD2PSrr VR128:$src)>;
   def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
             (VCVTPD2PSXrm addr:$src)>;
-  def : Pat<(v4f32 (fpround (v4f64 VR256:$src))),
-            (VCVTPD2PSYrr VR256:$src)>;
-  def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))),
-            (VCVTPD2PSYrm addr:$src)>;
 
   def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
             (VCVTPS2PDrr VR128:$src)>;
index 7565471..fd8e397 100644 (file)
@@ -23,7 +23,8 @@ enum IntrinsicType : uint16_t {
   INTR_NO_TYPE,
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS,
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
-  CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, CVTPD2PS,
+  CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
+  CVTPD2PS, CVTPD2PS_MASK,
   INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
   INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
   INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
@@ -251,6 +252,7 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) {
  * the alphabetical order.
  */
 static const IntrinsicData  IntrinsicsWithoutChain[] = {
+  X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
   X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx_hadd_pd_256,   INTR_TYPE_2OP, X86ISD::FHADD, 0),
   X86_INTRINSIC_DATA(avx_hadd_ps_256,   INTR_TYPE_2OP, X86ISD::FHADD, 0),
@@ -503,9 +505,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps,     INTR_TYPE_1OP_MASK,
                     X86ISD::VFPROUND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS,
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS_MASK,
                      ISD::FP_ROUND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS,
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK,
                      ISD::FP_ROUND, X86ISD::VFPROUND_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTP2SI, 0),
index a6835ad..da1df98 100644 (file)
@@ -2824,7 +2824,7 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
 ;
 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vcvtpd2psy %ymm0, %xmm0 ## encoding: [0xc5,0xfd,0x5a,0xc0]
+; AVX512VL-NEXT:    vcvtpd2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5a,0xc0]
 ; AVX512VL-NEXT:    retl ## encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res