From 30837abd9623bf2c8582627d2179828ecf361965 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 8 Sep 2019 19:24:29 +0000 Subject: [PATCH] [X86] Teach materializeVectorConstant to not call getZeroVector/getOnesVector on the types we already have isel patterns for. llvm-svn: 371343 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- llvm/test/CodeGen/X86/fold-load-vec.ll | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5e5ec5c..c784422 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9137,7 +9137,8 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, if (ISD::isBuildVectorAllZeros(Op.getNode())) { // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. - if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) + if (VT.isFloatingPoint() || + VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getZeroVector(VT, Subtarget, DAG, DL); @@ -9147,8 +9148,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { - if (VT == MVT::v4i32 || VT == MVT::v16i32 || - (VT == MVT::v8i32 && Subtarget.hasInt256())) + if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) return Op; return getOnesVector(VT, DAG, DL); diff --git a/llvm/test/CodeGen/X86/fold-load-vec.ll b/llvm/test/CodeGen/X86/fold-load-vec.ll index 115f2bf..e8dc8f2 100644 --- a/llvm/test/CodeGen/X86/fold-load-vec.ll +++ b/llvm/test/CodeGen/X86/fold-load-vec.ll @@ -12,7 +12,7 @@ define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind { ; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: movlps %xmm0, (%rsp) -; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movlps %xmm0, (%rsp) ; CHECK-NEXT: movlps %xmm0, (%rsi) ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -- 2.7.4