From 7fc6d34ed1fce99505713c5b09b3701aaac9d60e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 11 Dec 2016 22:32:38 +0000 Subject: [PATCH] [InstCombine][XOP] The instructions for the scalar frcz intrinsics are defined to put 0 in the upper bits, not pass bits through like other intrinsics. So we should return a zero vector instead. llvm-svn: 289411 --- .../InstCombine/InstCombineSimplifyDemanded.cpp | 16 ++++++++++++++-- llvm/test/Transforms/InstCombine/x86-xop.ll | 4 ++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 839eff7..abda6a3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1255,13 +1255,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, switch (II->getIntrinsicID()) { default: break; + case Intrinsic::x86_xop_vfrcz_ss: + case Intrinsic::x86_xop_vfrcz_sd: + // The instructions for these intrinsics are speced to zero upper bits not + // pass them through like other scalar intrinsics. So we shouldn't just + // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. + // Instead we should return a zero vector. + if (!DemandedElts[0]) + return ConstantAggregateZero::get(II->getType()); + + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + UndefElts, Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + break; + // Unary scalar-as-vector operations that work column-wise. case Intrinsic::x86_sse_rcp_ss: case Intrinsic::x86_sse_rsqrt_ss: case Intrinsic::x86_sse_sqrt_ss: case Intrinsic::x86_sse2_sqrt_sd: - case Intrinsic::x86_xop_vfrcz_ss: - case Intrinsic::x86_xop_vfrcz_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/x86-xop.ll index 015d511..d987c75 100644 --- a/llvm/test/Transforms/InstCombine/x86-xop.ll +++ b/llvm/test/Transforms/InstCombine/x86-xop.ll @@ -17,7 +17,7 @@ define double @test_vfrcz_sd_0(double %a) { define double @test_vfrcz_sd_1(double %a) { ; CHECK-LABEL: @test_vfrcz_sd_1( -; CHECK-NEXT: ret double 1.000000e+00 +; CHECK-NEXT: ret double 0.000000e+00 ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -44,7 +44,7 @@ define float @test_vfrcz_ss_0(float %a) { define float @test_vfrcz_ss_3(float %a) { ; CHECK-LABEL: @test_vfrcz_ss_3( -; CHECK-NEXT: ret float 3.000000e+00 +; CHECK-NEXT: ret float 0.000000e+00 ; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -- 2.7.4