From e165bc263132d3445f69a31e95a7b82336ec7a1b Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 28 Jun 2023 17:16:34 +0100 Subject: [PATCH] [SLP][AArch64] Extend extracts-from-scalarizable-vector.ll test for cmp cost testing. NFC See D153507. The existing test is over-simplified, as written it should have been simpified prior to SLP vectorization. I have left it as-is to ensure the crash it was protecting against doesn't arise again. A new test with valid inputs is also added to show the incorrect costs of alt cmp vectorization. --- .../AArch64/extracts-from-scalarizable-vector.ll | 29 ++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll index 1ace27f..ac4219e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s -define i1 @test() { -; CHECK-LABEL: define i1 @test() { +define i1 @degenerate() { +; CHECK-LABEL: define i1 @degenerate() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP0]]) @@ -20,3 +20,28 @@ entry: %or.cond30 = select i1 %or.cond29, i1 %cmp10, i1 false ret i1 %or.cond30 } + +define i1 @with_inputs(<4 x fp128> %a) { +; CHECK-LABEL: define i1 @with_inputs +; CHECK-SAME: (<4 x fp128> [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x fp128> [[A]], <4 x fp128> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x fp128> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x fp128> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: ret i1 [[TMP5]] +; +entry: + %0 = extractelement <4 x fp128> %a, i32 0 + %cmp = fcmp ogt fp128 %0, 0xL00000000000000000000000000000000 + %cmp3 = fcmp olt fp128 %0, 0xL00000000000000000000000000000000 + %or.cond = and i1 %cmp, %cmp3 + %1 = extractelement <4 x fp128> %a, i32 1 + %cmp6 = fcmp ogt fp128 %1, 0xL00000000000000000000000000000000 + %or.cond29 = select i1 %or.cond, i1 %cmp6, i1 false + %cmp10 = fcmp olt fp128 %1, 0xL00000000000000000000000000000000 + %or.cond30 = select i1 %or.cond29, i1 %cmp10, i1 false + ret i1 %or.cond30 +} -- 2.7.4