From 6a0dca535af9b3515ffd22c027937b3f3f908ff3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 20 Feb 2016 22:41:31 +0000 Subject: [PATCH] [InstCombine] Added SSE/SSE2 comparison intrinsics demanded vector elements tests llvm-svn: 261454 --- llvm/test/Transforms/InstCombine/x86-sse.ll | 308 ++++++++++++++++++++++++++- llvm/test/Transforms/InstCombine/x86-sse2.ll | 206 ++++++++++++++++++ 2 files changed, 511 insertions(+), 3 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/x86-sse.ll b/llvm/test/Transforms/InstCombine/x86-sse.ll index f0cdf0b..d6b4796 100644 --- a/llvm/test/Transforms/InstCombine/x86-sse.ll +++ b/llvm/test/Transforms/InstCombine/x86-sse.ll @@ -207,9 +207,297 @@ define float @test_cmp_ss_0(float %a, float %b) { ret float %r } -declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone -declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone -declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone +define i32 @test_comieq_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_comieq_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_comige_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_comige_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_comigt_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_comigt_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_comile_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_comile_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_comilt_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_comilt_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_comineq_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_comineq_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_ucomieq_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_ucomieq_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_ucomige_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_ucomige_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_ucomigt_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_ucomigt_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_ucomile_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_ucomile_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_ucomilt_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_ucomilt_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +define i32 @test_ucomineq_ss_0(float %a, float %b) { +; CHECK-LABEL: @test_ucomineq_ss_0 +; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 +; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 +; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 +; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 +; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 +; CHECK-NEXT: %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8) +; CHECK-NEXT: ret i32 %9 + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 + %5 = insertelement <4 x float> undef, float %b, i32 0 + %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 + %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 + %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 + %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8) + ret i32 %9 +} + +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) +declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) @@ -218,3 +506,17 @@ declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) + +declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) + +declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) +declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) diff --git a/llvm/test/Transforms/InstCombine/x86-sse2.ll b/llvm/test/Transforms/InstCombine/x86-sse2.ll index 1b02a7c..0fb6af4 100644 --- a/llvm/test/Transforms/InstCombine/x86-sse2.ll +++ b/llvm/test/Transforms/InstCombine/x86-sse2.ll @@ -127,6 +127,198 @@ define double @test_cmp_sd_0(double %a, double %b) { ret double %6 } +define i32 @test_comieq_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_comieq_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_comige_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_comige_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_comigt_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_comigt_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_comile_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_comile_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_comilt_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_comilt_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_comineq_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_comineq_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_ucomieq_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_ucomieq_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_ucomige_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_ucomige_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_ucomigt_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_ucomigt_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_ucomile_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_ucomile_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_ucomilt_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_ucomilt_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + +define i32 @test_ucomineq_sd_0(double %a, double %b) { +; CHECK-LABEL: @test_ucomineq_sd_0 +; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 +; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4) +; CHECK-NEXT: ret i32 %5 + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 + %3 = insertelement <2 x double> undef, double %b, i32 0 + %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 + %5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4) + ret i32 %5 +} + declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) @@ -136,3 +328,17 @@ declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) + +declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) + +declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) +declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) -- 2.7.4