From cdcbeb4997a85895266b688b6077c48fbe1c4085 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 2 Nov 2018 11:06:18 +0000 Subject: [PATCH] [DAGCombiner] Remove reduceBuildVecConvertToConvertBuildVec and rely on the vectorizers instead (PR35732) reduceBuildVecConvertToConvertBuildVec vectorizes int2float in the DAGCombiner, which means that even if the LV/SLP has decided to keep scalar code using the cost models, this will override this. While there are cases where vectorization is necessary in the DAG (mainly due to legalization artefacts), I don't think this is the case here, we should assume that the vectorizers know what they are doing. Differential Revision: https://reviews.llvm.org/D53712 llvm-svn: 345964 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 75 ---------- llvm/test/CodeGen/ARM/vdup.ll | 37 ++--- llvm/test/CodeGen/Mips/cconv/vector.ll | 151 +++++++++++++++------ llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 22 +-- llvm/test/CodeGen/X86/cvtv2f32.ll | 31 +++-- 5 files changed, 155 insertions(+), 161 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 03145c5..8c2f9e8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -464,7 +464,6 @@ namespace { SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); - SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, @@ -15854,77 +15853,6 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return DAG.getBitcast(VT, BV); } -SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { - EVT VT = N->getValueType(0); - - unsigned NumInScalars = N->getNumOperands(); - SDLoc DL(N); - - EVT SrcVT = MVT::Other; - unsigned Opcode = ISD::DELETED_NODE; - unsigned NumDefs = 0; - - for (unsigned i = 0; i != NumInScalars; ++i) { - SDValue In = N->getOperand(i); - unsigned Opc = In.getOpcode(); - - if (Opc == ISD::UNDEF) - continue; - - // If all scalar values are floats and converted from integers. - if (Opcode == ISD::DELETED_NODE && - (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { - Opcode = Opc; - } - - if (Opc != Opcode) - return SDValue(); - - EVT InVT = In.getOperand(0).getValueType(); - - // If all scalar values are typed differently, bail out. It's chosen to - // simplify BUILD_VECTOR of integer types. - if (SrcVT == MVT::Other) - SrcVT = InVT; - if (SrcVT != InVT) - return SDValue(); - NumDefs++; - } - - // If the vector has just one element defined, it's not worth to fold it into - // a vectorized one. - if (NumDefs < 2) - return SDValue(); - - assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) - && "Should only handle conversion from integer to float."); - assert(SrcVT != MVT::Other && "Cannot determine source type!"); - - EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); - - if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) - return SDValue(); - - // Just because the floating-point vector type is legal does not necessarily - // mean that the corresponding integer vector type is. - if (!isTypeLegal(NVT)) - return SDValue(); - - SmallVector Opnds; - for (unsigned i = 0; i != NumInScalars; ++i) { - SDValue In = N->getOperand(i); - - if (In.isUndef()) - Opnds.push_back(DAG.getUNDEF(SrcVT)); - else - Opnds.push_back(In.getOperand(0)); - } - SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); - AddToWorklist(BV.getNode()); - - return DAG.getNode(Opcode, DL, VT, BV); -} - SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, @@ -16371,9 +16299,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) - return V; - if (SDValue V = reduceBuildVecToShuffle(N)) return V; diff --git a/llvm/test/CodeGen/ARM/vdup.ll b/llvm/test/CodeGen/ARM/vdup.ll index c16a2a9..5127dab 100644 --- a/llvm/test/CodeGen/ARM/vdup.ll +++ b/llvm/test/CodeGen/ARM/vdup.ll @@ -488,11 +488,12 @@ define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) { ; CHECK-LABEL: check_spr_splat2: ; CHECK: @ %bb.0: ; CHECK-NEXT: lsl r2, r2, #16 -; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: asr r2, r2, #16 -; CHECK-NEXT: vdup.32 d16, r2 -; CHECK-NEXT: vcvt.f32.s32 d16, d16 -; CHECK-NEXT: vsub.f32 d16, d16, d17 +; CHECK-NEXT: vmov s0, r2 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vdup.32 d17, d0[0] +; CHECK-NEXT: vsub.f32 d16, d17, d16 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: mov pc, lr %conv = sitofp i16 %q to float @@ -505,13 +506,13 @@ define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) { define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) { ; CHECK-LABEL: check_spr_splat4: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov r12, sp -; CHECK-NEXT: vmov d19, r2, r3 -; CHECK-NEXT: vld1.16 {d16[]}, [r12:16] -; CHECK-NEXT: vmov d18, r0, r1 -; CHECK-NEXT: vmovl.s16 q8, d16 -; CHECK-NEXT: vcvt.f32.s32 q8, q8 -; CHECK-NEXT: vsub.f32 q8, q8, q9 +; CHECK-NEXT: ldrsh r12, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov s0, r12 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vdup.32 q9, d0[0] +; CHECK-NEXT: vsub.f32 q8, q9, q8 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr @@ -525,13 +526,13 @@ define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) { define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) { ; CHECK-LABEL: check_spr_splat4_lane1: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov r12, sp -; CHECK-NEXT: vmov d19, r2, r3 -; CHECK-NEXT: vld1.16 {d16[]}, [r12:16] -; CHECK-NEXT: vmov d18, r0, r1 -; CHECK-NEXT: vmovl.s16 q8, d16 -; CHECK-NEXT: vcvt.f32.s32 q8, q8 -; CHECK-NEXT: vsub.f32 q8, q8, q9 +; CHECK-NEXT: ldrsh r12, [sp] +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vmov s0, r12 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vdup.32 q9, d0[0] +; CHECK-NEXT: vsub.f32 q8, q9, q8 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll index d6e2607..9a55285 100644 --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -6181,14 +6181,15 @@ define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) { ; MIPS32R5-NEXT: addiu $1, $zero, -16 ; MIPS32R5-NEXT: and $sp, $sp, $1 ; MIPS32R5-NEXT: andi $1, $6, 255 -; MIPS32R5-NEXT: sw $1, 36($sp) -; MIPS32R5-NEXT: sw $1, 32($sp) +; MIPS32R5-NEXT: mtc1 $1, $f0 +; MIPS32R5-NEXT: cvt.s.w $f0, $f0 +; MIPS32R5-NEXT: swc1 $f0, 36($sp) +; MIPS32R5-NEXT: swc1 $f0, 32($sp) ; MIPS32R5-NEXT: sw $5, 4($sp) ; MIPS32R5-NEXT: sw $4, 0($sp) -; MIPS32R5-NEXT: ld.w $w0, 32($sp) -; MIPS32R5-NEXT: ffint_s.w $w0, $w0 -; MIPS32R5-NEXT: ld.w $w1, 0($sp) -; MIPS32R5-NEXT: fadd.w $w0, $w0, $w1 +; MIPS32R5-NEXT: ld.w $w0, 0($sp) +; MIPS32R5-NEXT: ld.w $w1, 32($sp) +; MIPS32R5-NEXT: fadd.w $w0, $w1, $w0 ; MIPS32R5-NEXT: lw $1, 84($fp) ; MIPS32R5-NEXT: sw $1, 20($sp) ; MIPS32R5-NEXT: lw $1, 80($fp) @@ -6209,13 +6210,14 @@ define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) { ; MIPS64R5-NEXT: .cfi_def_cfa_offset 48 ; MIPS64R5-NEXT: sll $1, $5, 0 ; MIPS64R5-NEXT: andi $1, $1, 255 -; MIPS64R5-NEXT: sw $1, 36($sp) -; MIPS64R5-NEXT: sw $1, 32($sp) +; MIPS64R5-NEXT: mtc1 $1, $f0 +; MIPS64R5-NEXT: cvt.s.w $f0, $f0 +; MIPS64R5-NEXT: swc1 $f0, 36($sp) +; MIPS64R5-NEXT: swc1 $f0, 32($sp) ; MIPS64R5-NEXT: sd $4, 0($sp) -; MIPS64R5-NEXT: ld.w $w0, 32($sp) -; MIPS64R5-NEXT: ffint_s.w $w0, $w0 -; MIPS64R5-NEXT: ld.w $w1, 0($sp) -; MIPS64R5-NEXT: fadd.w $w0, $w0, $w1 +; MIPS64R5-NEXT: ld.w $w0, 0($sp) +; MIPS64R5-NEXT: ld.w $w1, 32($sp) +; MIPS64R5-NEXT: fadd.w $w0, $w1, $w0 ; MIPS64R5-NEXT: sd $6, 16($sp) ; MIPS64R5-NEXT: ld.w $w1, 16($sp) ; MIPS64R5-NEXT: fadd.w $w0, $w0, $w1 @@ -6337,36 +6339,59 @@ define <4 x float> @mixed_32(<4 x float> %a, i32 %b) { ; MIPS64EB-NEXT: jr $ra ; MIPS64EB-NEXT: nop ; -; MIPS32R5-LABEL: mixed_32: -; MIPS32R5: # %bb.0: # %entry -; MIPS32R5-NEXT: ldi.b $w0, 0 -; MIPS32R5-NEXT: insert.w $w0[0], $6 -; MIPS32R5-NEXT: insert.w $w0[1], $7 -; MIPS32R5-NEXT: lw $1, 16($sp) -; MIPS32R5-NEXT: insert.w $w0[2], $1 -; MIPS32R5-NEXT: lw $1, 20($sp) -; MIPS32R5-NEXT: insert.w $w0[3], $1 -; MIPS32R5-NEXT: lw $1, 24($sp) -; MIPS32R5-NEXT: fill.w $w1, $1 -; MIPS32R5-NEXT: ffint_u.w $w1, $w1 -; MIPS32R5-NEXT: fadd.w $w0, $w1, $w0 -; MIPS32R5-NEXT: st.w $w0, 0($4) -; MIPS32R5-NEXT: jr $ra -; MIPS32R5-NEXT: nop +; MIPS32R5EB-LABEL: mixed_32: +; MIPS32R5EB: # %bb.0: # %entry +; MIPS32R5EB-NEXT: addiu $sp, $sp, -8 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R5EB-NEXT: lui $1, 17200 +; MIPS32R5EB-NEXT: sw $1, 0($sp) +; MIPS32R5EB-NEXT: lw $1, 32($sp) +; MIPS32R5EB-NEXT: sw $1, 4($sp) +; MIPS32R5EB-NEXT: lui $1, %hi($CPI41_0) +; MIPS32R5EB-NEXT: ldc1 $f0, %lo($CPI41_0)($1) +; MIPS32R5EB-NEXT: ldc1 $f1, 0($sp) +; MIPS32R5EB-NEXT: sub.d $f0, $f1, $f0 +; MIPS32R5EB-NEXT: cvt.s.d $f0, $f0 +; MIPS32R5EB-NEXT: ldi.b $w1, 0 +; MIPS32R5EB-NEXT: splati.w $w0, $w0[0] +; MIPS32R5EB-NEXT: insert.w $w1[0], $6 +; MIPS32R5EB-NEXT: insert.w $w1[1], $7 +; MIPS32R5EB-NEXT: lw $1, 24($sp) +; MIPS32R5EB-NEXT: insert.w $w1[2], $1 +; MIPS32R5EB-NEXT: lw $1, 28($sp) +; MIPS32R5EB-NEXT: insert.w $w1[3], $1 +; MIPS32R5EB-NEXT: fadd.w $w0, $w0, $w1 +; MIPS32R5EB-NEXT: st.w $w0, 0($4) +; MIPS32R5EB-NEXT: addiu $sp, $sp, 8 +; MIPS32R5EB-NEXT: jr $ra +; MIPS32R5EB-NEXT: nop ; ; MIPS64R5EB-LABEL: mixed_32: ; MIPS64R5EB: # %bb.0: # %entry -; MIPS64R5EB-NEXT: ldi.b $w0, 0 -; MIPS64R5EB-NEXT: insert.d $w0[0], $4 -; MIPS64R5EB-NEXT: insert.d $w0[1], $5 -; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177 -; MIPS64R5EB-NEXT: sll $1, $6, 0 -; MIPS64R5EB-NEXT: fill.w $w1, $1 -; MIPS64R5EB-NEXT: ffint_u.w $w1, $w1 -; MIPS64R5EB-NEXT: fadd.w $w0, $w1, $w0 +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(mixed_32))) +; MIPS64R5EB-NEXT: daddu $1, $1, $25 +; MIPS64R5EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) +; MIPS64R5EB-NEXT: lui $2, 17200 +; MIPS64R5EB-NEXT: sw $2, 8($sp) +; MIPS64R5EB-NEXT: sll $2, $6, 0 +; MIPS64R5EB-NEXT: sw $2, 12($sp) +; MIPS64R5EB-NEXT: ld $1, %got_page(.LCPI41_0)($1) +; MIPS64R5EB-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) +; MIPS64R5EB-NEXT: ldc1 $f1, 8($sp) +; MIPS64R5EB-NEXT: sub.d $f0, $f1, $f0 +; MIPS64R5EB-NEXT: ldi.b $w1, 0 +; MIPS64R5EB-NEXT: insert.d $w1[0], $4 +; MIPS64R5EB-NEXT: insert.d $w1[1], $5 +; MIPS64R5EB-NEXT: shf.w $w1, $w1, 177 +; MIPS64R5EB-NEXT: cvt.s.d $f0, $f0 +; MIPS64R5EB-NEXT: splati.w $w0, $w0[0] +; MIPS64R5EB-NEXT: fadd.w $w0, $w0, $w1 ; MIPS64R5EB-NEXT: shf.w $w0, $w0, 177 ; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0] ; MIPS64R5EB-NEXT: copy_s.d $3, $w0[1] +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EB-NEXT: jr $ra ; MIPS64R5EB-NEXT: nop ; @@ -6445,17 +6470,57 @@ define <4 x float> @mixed_32(<4 x float> %a, i32 %b) { ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop ; +; MIPS32R5EL-LABEL: mixed_32: +; MIPS32R5EL: # %bb.0: # %entry +; MIPS32R5EL-NEXT: addiu $sp, $sp, -8 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R5EL-NEXT: lui $1, 17200 +; MIPS32R5EL-NEXT: sw $1, 4($sp) +; MIPS32R5EL-NEXT: lw $1, 32($sp) +; MIPS32R5EL-NEXT: sw $1, 0($sp) +; MIPS32R5EL-NEXT: lui $1, %hi($CPI41_0) +; MIPS32R5EL-NEXT: ldc1 $f0, %lo($CPI41_0)($1) +; MIPS32R5EL-NEXT: ldc1 $f1, 0($sp) +; MIPS32R5EL-NEXT: sub.d $f0, $f1, $f0 +; MIPS32R5EL-NEXT: cvt.s.d $f0, $f0 +; MIPS32R5EL-NEXT: ldi.b $w1, 0 +; MIPS32R5EL-NEXT: splati.w $w0, $w0[0] +; MIPS32R5EL-NEXT: insert.w $w1[0], $6 +; MIPS32R5EL-NEXT: insert.w $w1[1], $7 +; MIPS32R5EL-NEXT: lw $1, 24($sp) +; MIPS32R5EL-NEXT: insert.w $w1[2], $1 +; MIPS32R5EL-NEXT: lw $1, 28($sp) +; MIPS32R5EL-NEXT: insert.w $w1[3], $1 +; MIPS32R5EL-NEXT: fadd.w $w0, $w0, $w1 +; MIPS32R5EL-NEXT: st.w $w0, 0($4) +; MIPS32R5EL-NEXT: addiu $sp, $sp, 8 +; MIPS32R5EL-NEXT: jr $ra +; MIPS32R5EL-NEXT: nop +; ; MIPS64R5EL-LABEL: mixed_32: ; MIPS64R5EL: # %bb.0: # %entry -; MIPS64R5EL-NEXT: ldi.b $w0, 0 -; MIPS64R5EL-NEXT: insert.d $w0[0], $4 -; MIPS64R5EL-NEXT: insert.d $w0[1], $5 -; MIPS64R5EL-NEXT: sll $1, $6, 0 -; MIPS64R5EL-NEXT: fill.w $w1, $1 -; MIPS64R5EL-NEXT: ffint_u.w $w1, $w1 -; MIPS64R5EL-NEXT: fadd.w $w0, $w1, $w0 +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R5EL-NEXT: lui $1, %hi(%neg(%gp_rel(mixed_32))) +; MIPS64R5EL-NEXT: daddu $1, $1, $25 +; MIPS64R5EL-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) +; MIPS64R5EL-NEXT: lui $2, 17200 +; MIPS64R5EL-NEXT: sw $2, 12($sp) +; MIPS64R5EL-NEXT: sll $2, $6, 0 +; MIPS64R5EL-NEXT: sw $2, 8($sp) +; MIPS64R5EL-NEXT: ld $1, %got_page(.LCPI41_0)($1) +; MIPS64R5EL-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) +; MIPS64R5EL-NEXT: ldc1 $f1, 8($sp) +; MIPS64R5EL-NEXT: sub.d $f0, $f1, $f0 +; MIPS64R5EL-NEXT: ldi.b $w1, 0 +; MIPS64R5EL-NEXT: insert.d $w1[0], $4 +; MIPS64R5EL-NEXT: insert.d $w1[1], $5 +; MIPS64R5EL-NEXT: cvt.s.d $f0, $f0 +; MIPS64R5EL-NEXT: splati.w $w0, $w0[0] +; MIPS64R5EL-NEXT: fadd.w $w0, $w0, $w1 ; MIPS64R5EL-NEXT: copy_s.d $2, $w0[0] ; MIPS64R5EL-NEXT: copy_s.d $3, $w0[1] +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64R5EL-NEXT: jr $ra ; MIPS64R5EL-NEXT: nop entry: diff --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 945f473..0cce34f 100644 --- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; REQUIRES: asserts -; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "9 machinelicm" +; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "6 machinelicm" ; RUN: llc < %s -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s ; rdar://6627786 ; rdar://7792037 @@ -24,15 +24,17 @@ define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_3: ## %bb.i ; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: pinsrd $1, 4, %xmm0 -; CHECK-NEXT: pinsrd $2, 8, %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2],mem[3],xmm1[4],mem[5],xmm1[6],mem[7] -; CHECK-NEXT: psrld $16, %xmm0 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: addps %xmm1, %xmm0 +; CHECK-NEXT: movl 0, %eax +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm0 +; CHECK-NEXT: movl 4, %eax +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm1 +; CHECK-NEXT: movl 8, %eax +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: cvtsi2ssq %rax, %xmm2 +; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; CHECK-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: movaps %xmm0, 0 ; CHECK-NEXT: LBB0_1: ## %bb4 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/X86/cvtv2f32.ll b/llvm/test/CodeGen/X86/cvtv2f32.ll index cda0047..c755d5f 100644 --- a/llvm/test/CodeGen/X86/cvtv2f32.ll +++ b/llvm/test/CodeGen/X86/cvtv2f32.ll @@ -8,26 +8,27 @@ define <2 x float> @uitofp_2i32_cvt_buildvector(i32 %x, i32 %y, <2 x float> %v) { ; X32-LABEL: uitofp_2i32_cvt_buildvector: ; X32: # %bb.0: -; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] -; X32-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; X32-NEXT: psrld $16, %xmm1 -; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2],mem[3],xmm1[4],mem[5],xmm1[6],mem[7] -; X32-NEXT: addps {{\.LCPI.*}}, %xmm1 -; X32-NEXT: addps %xmm2, %xmm1 +; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; X32-NEXT: orpd %xmm2, %xmm1 +; X32-NEXT: subsd %xmm2, %xmm1 +; X32-NEXT: cvtsd2ss %xmm1, %xmm1 +; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X32-NEXT: orpd %xmm2, %xmm3 +; X32-NEXT: subsd %xmm2, %xmm3 +; X32-NEXT: xorps %xmm2, %xmm2 +; X32-NEXT: cvtsd2ss %xmm3, %xmm2 +; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; X32-NEXT: mulps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: uitofp_2i32_cvt_buildvector: ; X64: # %bb.0: -; X64-NEXT: movd %edi, %xmm1 -; X64-NEXT: pinsrd $1, %esi, %xmm1 -; X64-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] -; X64-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] -; X64-NEXT: psrld $16, %xmm1 -; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2],mem[3],xmm1[4],mem[5],xmm1[6],mem[7] -; X64-NEXT: addps {{.*}}(%rip), %xmm1 -; X64-NEXT: addps %xmm2, %xmm1 +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2ssq %rax, %xmm1 +; X64-NEXT: movl %esi, %eax +; X64-NEXT: cvtsi2ssq %rax, %xmm2 +; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] ; X64-NEXT: mulps %xmm1, %xmm0 ; X64-NEXT: retq %t1 = uitofp i32 %x to float -- 2.7.4