From ae6e89327b04a94b6d1a2533c598ec6be60eb922 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 29 Dec 2020 17:02:25 +0900 Subject: [PATCH] Precommit tests that have poison as shufflevector's placeholder This commit copies existing tests at llvm/Transforms containing 'shufflevector X, undef' and replaces them with 'shufflevector X, poison'. The new copied tests have *-inseltpoison.ll suffix at its file name (as db7a2f347f132b3920415013d62d1adfb18d8d58 did) See https://reviews.llvm.org/D93793 Test files listed using grep -R -E "^[^;]*shufflevector <.*> .*, <.*> undef" | cut -d":" -f1 | uniq Test files copied & updated using file_org=llvm/test/Transforms/$1 if [[ "$file_org" = *-inseltpoison.ll ]]; then file=$file_org else file=${file_org%.ll}-inseltpoison.ll if [ ! -f $file ]; then cp $file_org $file fi fi sed -i -E 's/^([^;]*)shufflevector <(.*)> (.*), <(.*)> undef/\1shufflevector <\2> \3, <\4> poison/g' $file head -1 $file | grep "Assertions have been autogenerated by utils/update_test_checks.py" -q if [ "$?" == 1 ]; then echo "$file : should be manually updated" # The test is manually updated exit 1 fi python3 ./llvm/utils/update_test_checks.py --opt-binary=./build-releaseassert/bin/opt $file --- .../AArch64/gather-scatter-opt-inseltpoison.ll | 12 +- .../AArch64/sink-free-instructions-inseltpoison.ll | 274 ++++ .../ARM/sink-add-mul-shufflevector-inseltpoison.ll | 30 +- .../ARM/sink-free-instructions-inseltpoison.ll | 232 ++++ .../CodeGenPrepare/ARM/sinkchain-inseltpoison.ll | 10 +- .../X86/cgp_shuffle_crash-inseltpoison.ll | 14 + .../X86/gather-scatter-opt-inseltpoison.ll | 12 +- .../CodeGenPrepare/X86/vec-shift-inseltpoison.ll | 111 +- .../X86/x86-shuffle-sink-inseltpoison.ll | 50 +- .../masked-dead-store-inseltpoison.ll | 78 ++ .../Inline/inlined-loop-metadata-inseltpoison.ll | 159 +++ .../amdgcn-demanded-vector-elts-inseltpoison.ll | 250 ++-- .../X86/shufflemask-undef-inseltpoison.ll | 110 ++ .../InstCombine/X86/x86-addsub-inseltpoison.ll | 38 +- .../InstCombine/X86/x86-avx2-inseltpoison.ll | 110 ++ .../InstCombine/X86/x86-f16c-inseltpoison.ll | 71 + .../InstCombine/X86/x86-muldq-inseltpoison.ll | 281 ++++ .../InstCombine/X86/x86-pack-inseltpoison.ll | 60 +- .../InstCombine/X86/x86-pshufb-inseltpoison.ll | 515 +++++++ .../InstCombine/X86/x86-sse4a-inseltpoison.ll | 420 ++++++ .../X86/x86-vector-shifts-inseltpoison.ll | 70 +- .../InstCombine/X86/x86-vpermil-inseltpoison.ll | 301 ++++ .../Transforms/InstCombine/assume-inseltpoison.ll | 656 +++++++++ .../Transforms/InstCombine/bswap-inseltpoison.ll | 867 ++++++++++++ .../InstCombine/extractelement-inseltpoison.ll | 2 +- .../Transforms/InstCombine/fmul-inseltpoison.ll | 1176 ++++++++++++++++ .../InstCombine/icmp-bc-vec-inseltpoison.ll | 22 +- .../InstCombine/icmp-vec-inseltpoison.ll | 375 +++++ .../insert-extract-shuffle-inseltpoison.ll | 62 +- .../InstCombine/logical-select-inseltpoison.ll | 637 +++++++++ .../InstCombine/masked_intrinsics-inseltpoison.ll | 4 +- .../Transforms/InstCombine/mul-inseltpoison.ll | 1108 +++++++++++++++ .../Transforms/InstCombine/nsw-inseltpoison.ll | 142 ++ .../InstCombine/obfuscated_splat-inseltpoison.ll | 11 + .../InstCombine/pr2645-0-inseltpoison.ll | 34 + .../InstCombine/scalarization-inseltpoison.ll | 2 +- .../select-extractelement-inseltpoison.ll | 4 +- .../InstCombine/shift-add-inseltpoison.ll | 12 +- .../InstCombine/shuffle-cast-inseltpoison.ll | 123 ++ .../shuffle-select-narrow-inseltpoison.ll | 144 ++ .../InstCombine/shuffle_select-inseltpoison.ll | 1467 ++++++++++++++++++++ .../InstCombine/shufflevec-bitcast-inseltpoison.ll | 169 +++ .../shufflevec-constant-inseltpoison.ll | 17 + .../shufflevector-div-rem-inseltpoison.ll | 22 +- .../InstCombine/sub-of-negatible-inseltpoison.ll | 1406 +++++++++++++++++++ .../trunc-extractelement-inseltpoison.ll | 4 +- .../Transforms/InstCombine/trunc-inseltpoison.ll | 1023 ++++++++++++++ .../InstCombine/type_pun-inseltpoison.ll | 155 +++ .../InstCombine/vec-binop-select-inseltpoison.ll | 287 ++++ .../InstCombine/vec_demanded_elts-inseltpoison.ll | 160 +-- .../InstCombine/vec_gep_scalar_arg-inseltpoison.ll | 2 +- .../InstCombine/vec_phi_extract-inseltpoison.ll | 6 +- .../InstCombine/vec_shuffle-inseltpoison.ll | 414 +++--- .../vector-concat-binop-inseltpoison.ll | 282 ++++ .../InstCombine/vector_gep1-inseltpoison.ll | 4 +- .../vscale_extractelement-inseltpoison.ll | 16 +- .../vscale_insertelement-inseltpoison.ll | 4 +- .../ConstProp/vector-undef-elts-inseltpoison.ll | 69 + .../InstSimplify/ConstProp/vscale-inseltpoison.ll | 8 +- .../InstSimplify/shufflevector-inseltpoison.ll | 286 ++++ .../Transforms/InstSimplify/vscale-inseltpoison.ll | 8 +- .../AArch64/binopshuffles-inseltpoison.ll | 151 ++ ...terleaved-accesses-extract-user-inseltpoison.ll | 113 ++ .../AArch64/interleaved-accesses-inseltpoison.ll | 801 +++++++++++ ...terleaved-accesses-extract-user-inseltpoison.ll | 113 ++ .../ARM/interleaved-accesses-inseltpoison.ll | 1432 +++++++++++++++++++ ...interleaved-accesses-64bits-avx-inseltpoison.ll | 243 ++++ .../X86/interleavedLoad-inseltpoison.ll | 158 +++ .../X86/interleavedStore-inseltpoison.ll | 243 ++++ .../LoopSimplify/do-preheader-dbg-inseltpoison.ll | 122 ++ .../AMDGPU/lsr-void-inseltpoison.ll | 37 + .../ARM/vctp-chains-inseltpoison.ll | 16 +- .../p8-unrolling-legalize-vectors-inseltpoison.ll | 4 +- .../LoopUnroll/X86/pr46430-inseltpoison.ll | 23 + .../X86/scalarization-inseltpoison.ll | 16 +- .../PhaseOrdering/X86/shuffle-inseltpoison.ll | 319 +++++ .../Transforms/Scalarizer/basic-inseltpoison.ll | 2 +- .../Scalarizer/dbgloc-bug-inseltpoison.ll | 2 +- .../Scalarizer/order-bug-inseltpoison.ll | 2 +- .../Transforms/Scalarizer/phi-bug-inseltpoison.ll | 25 + .../spec-other-inseltpoison.ll | 2 +- .../AArch64/vscale-bitcast-shuffle-inseltpoison.ll | 21 + .../VectorCombine/X86/no-sse-inseltpoison.ll | 15 + .../VectorCombine/X86/shuffle-inseltpoison.ll | 152 ++ 84 files changed, 17691 insertions(+), 719 deletions(-) create mode 100644 llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll create mode 100644 llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll create mode 100644 llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll create mode 100755 llvm/test/Transforms/Inline/inlined-loop-metadata-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/assume-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/mul-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/obfuscated_splat-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/pr2645-0-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/shuffle-cast-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/shufflevec-constant-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/vec-binop-select-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstCombine/vector-concat-binop-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstSimplify/ConstProp/vector-undef-elts-inseltpoison.ll create mode 100644 llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-extract-user-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-extract-user-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/X86/interleavedLoad-inseltpoison.ll create mode 100644 llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll create mode 100755 llvm/test/Transforms/LoopSimplify/do-preheader-dbg-inseltpoison.ll create mode 100644 llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-void-inseltpoison.ll create mode 100644 llvm/test/Transforms/LoopUnroll/X86/pr46430-inseltpoison.ll create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll create mode 100644 llvm/test/Transforms/Scalarizer/phi-bug-inseltpoison.ll create mode 100644 llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle-inseltpoison.ll create mode 100644 llvm/test/Transforms/VectorCombine/X86/no-sse-inseltpoison.ll create mode 100644 llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll index 5611ac7..d340816 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/gather-scatter-opt-inseltpoison.ll @@ -14,7 +14,7 @@ define @splat_base(i32* %base, %index, [[RES]] ; %broadcast.splatinsert = insertelement poison, i32* %base, i32 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, undef, zeroinitializer + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer %gep = getelementptr i32, %broadcast.splat, %index %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %gep, i32 4, %mask, undef) ret %res @@ -40,7 +40,7 @@ define @scalar_index(i32* %base, i64 %index, [[RES]] ; %broadcast.splatinsert = insertelement poison, i32* %base, i32 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, undef, zeroinitializer + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer %gep = getelementptr i32, %broadcast.splat, i64 %index %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %gep, i32 4, %mask, undef) ret %res @@ -54,7 +54,7 @@ define @splat_index(i32* %base, i64 %index, ; CHECK-NEXT: ret [[RES]] ; %broadcast.splatinsert = insertelement poison, i64 %index, i32 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, undef, zeroinitializer + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer %gep = getelementptr i32, i32* %base, %broadcast.splat %res = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %gep, i32 4, %mask, undef) ret %res @@ -77,7 +77,7 @@ define @global_struct_splat( %mask) #0 { ; CHECK-NEXT: ret [[TMP1]] ; %1 = insertelement poison, %struct.a* @c, i32 0 - %2 = shufflevector %1, undef, zeroinitializer + %2 = shufflevector %1, poison, zeroinitializer %3 = getelementptr %struct.a, %2, zeroinitializer, i32 1 %4 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %3, i32 4, %mask, undef) ret %4 @@ -90,7 +90,7 @@ define @splat_ptr_gather(i32* %ptr, %mask, ; CHECK-NEXT: ret [[TMP2]] ; %1 = insertelement poison, i32* %ptr, i32 0 - %2 = shufflevector %1, undef, zeroinitializer + %2 = shufflevector %1, poison, zeroinitializer %3 = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %2, i32 4, %mask, %passthru) ret %3 } @@ -102,7 +102,7 @@ define void @splat_ptr_scatter(i32* %ptr, %mask, poison, i32* %ptr, i32 0 - %2 = shufflevector %1, undef, zeroinitializer + %2 = shufflevector %1, poison, zeroinitializer call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %val, %2, i32 4, %mask) ret void } diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll new file mode 100644 index 0000000..863cbc0 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll @@ -0,0 +1,274 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -codegenprepare -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown" + +define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; CHECK-LABEL: @sink_zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; CHECK-NEXT: ret <8 x i16> [[RES_1]] +; CHECK: if.else: +; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; CHECK-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = zext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = zext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = zext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; CHECK-LABEL: @sink_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; CHECK-NEXT: ret <8 x i16> [[RES_1]] +; CHECK: if.else: +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; CHECK-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = sext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = sext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; CHECK-LABEL: @do_not_sink_nonfree_zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; CHECK-NEXT: ret <8 x i16> [[RES_1]] +; CHECK: if.else: +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[ZB_2]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = sext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = sext <8 x i8> %b to <8 x i16> + ret <8 x i16> %zb.2 +} + +define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; CHECK-LABEL: @do_not_sink_nonfree_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; CHECK-NEXT: ret <8 x i16> [[RES_1]] +; CHECK: if.else: +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[ZB_2]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = sext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = sext <8 x i8> %b to <8 x i16> + ret <8 x i16> %zb.2 +} + +; The masks used are suitable for umull, sink shufflevector to users. +define <8 x i16> @sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: @sink_shufflevector_umull( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]]) +; CHECK-NEXT: ret <8 x i16> [[VMULL0]] +; CHECK: if.else: +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]]) +; CHECK-NEXT: ret <8 x i16> [[VMULL1]] +; +entry: + %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + br i1 undef, label %if.then, label %if.else + +if.then: + %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3 + ret <8 x i16> %vmull0 + +if.else: + %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3 + ret <8 x i16> %vmull1 +} + +; Both exts and their shufflevector operands can be sunk. +define <8 x i16> @sink_shufflevector_ext_subadd(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: @sink_shufflevector_ext_subadd( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]] +; CHECK-NEXT: ret <8 x i16> [[RES1]] +; CHECK: if.else: +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]] +; CHECK-NEXT: ret <8 x i16> [[RES2]] +; +entry: + %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %z1 = zext <8 x i8> %s1 to <8 x i16> + %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %z3 = sext <8 x i8> %s3 to <8 x i16> + br i1 undef, label %if.then, label %if.else + +if.then: + %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %z2 = zext <8 x i8> %s2 to <8 x i16> + %res1 = add <8 x i16> %z1, %z2 + ret <8 x i16> %res1 + +if.else: + %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %z4 = sext <8 x i8> %s4 to <8 x i16> + %res2 = sub <8 x i16> %z3, %z4 + ret <8 x i16> %res2 +} + + +declare void @user1(<8 x i16>) + +; Both exts and their shufflevector operands can be sunk. +define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: @sink_shufflevector_ext_subadd_multiuse( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; CHECK-NEXT: call void @user1(<8 x i16> [[Z3]]) +; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]] +; CHECK-NEXT: ret <8 x i16> [[RES1]] +; CHECK: if.else: +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]] +; CHECK-NEXT: ret <8 x i16> [[RES2]] +; +entry: + %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %z1 = zext <8 x i8> %s1 to <8 x i16> + %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %z3 = sext <8 x i8> %s3 to <8 x i16> + call void @user1(<8 x i16> %z3) + br i1 undef, label %if.then, label %if.else + +if.then: + %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %z2 = zext <8 x i8> %s2 to <8 x i16> + %res1 = add <8 x i16> %z1, %z2 + ret <8 x i16> %res1 + +if.else: + %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %z4 = sext <8 x i8> %s4 to <8 x i16> + %res2 = sub <8 x i16> %z3, %z4 + ret <8 x i16> %res2 +} + + +; The masks used are not suitable for umull, do not sink. +define <8 x i16> @no_sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: @no_sink_shufflevector_umull( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S1]], <8 x i8> [[S2]]) +; CHECK-NEXT: ret <8 x i16> [[VMULL0]] +; CHECK: if.else: +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S3]], <8 x i8> [[S4]]) +; CHECK-NEXT: ret <8 x i16> [[VMULL1]] +; +entry: + %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + br i1 undef, label %if.then, label %if.else + +if.then: + %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3 + ret <8 x i16> %vmull0 + +if.else: + %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3 + ret <8 x i16> %vmull1 +} + + +; Function Attrs: nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) #2 diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector-inseltpoison.ll index f43b8f6..9e92599 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-add-mul-shufflevector-inseltpoison.ll @@ -4,10 +4,10 @@ define void @sink_add_mul(i32* %s1, i32 %x, i32* %d, i32 %n) { ; CHECK-LABEL: @sink_add_mul( ; CHECK: vector.ph: ; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 -; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: vector.body: ; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; entry: %cmp6 = icmp sgt i32 %n, 0 @@ -16,7 +16,7 @@ entry: vector.ph: ; preds = %for.body.preheader %n.vec = and i32 %n, -4 %broadcast.splatinsert8 = insertelement <4 x i32> poison, i32 %x, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer + %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> poison, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -43,13 +43,13 @@ define void @sink_add_mul_multiple(i32* %s1, i32* %s2, i32 %x, i32* %d, i32* %d2 ; CHECK-LABEL: @sink_add_mul_multiple( ; CHECK: vector.ph: ; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 -; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: vector.body: ; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %x, i32 0 -; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: mul nsw <4 x i32> %wide.load, [[TMP3]] ; CHECK: [[TMP2b:%.*]] = insertelement <4 x i32> poison, i32 %x, i32 0 -; CHECK: [[TMP3b:%.*]] = shufflevector <4 x i32> [[TMP2b]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: [[TMP3b:%.*]] = shufflevector <4 x i32> [[TMP2b]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: mul nsw <4 x i32> %wide.load18, [[TMP3b]] ; entry: @@ -59,7 +59,7 @@ entry: vector.ph: ; preds = %for.body.preheader %n.vec = and i32 %n, -4 %broadcast.splatinsert15 = insertelement <4 x i32> poison, i32 %x, i32 0 - %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer + %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> poison, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -101,7 +101,7 @@ define void @sink_add_sub_unsinkable(i32* %s1, i32* %s2, i32 %x, i32* %d, i32* % ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; entry: @@ -111,7 +111,7 @@ entry: vector.ph: ; preds = %for.body.preheader %n.vec = and i32 %n, -4 %broadcast.splatinsert15 = insertelement <4 x i32> poison, i32 %x, i32 0 - %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer + %broadcast.splat16 = shufflevector <4 x i32> %broadcast.splatinsert15, <4 x i32> poison, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -148,10 +148,10 @@ define void @sink_sub(i32* %s1, i32 %x, i32* %d, i32 %n) { ; CHECK-LABEL: @sink_sub( ; CHECK: vector.ph: ; CHECK-NOT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NOT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT8]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK: vector.body: ; CHECK: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; entry: %cmp6 = icmp sgt i32 %n, 0 @@ -160,7 +160,7 @@ entry: vector.ph: ; preds = %for.body.preheader %n.vec = and i32 %n, -4 %broadcast.splatinsert8 = insertelement <4 x i32> poison, i32 %x, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer + %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> poison, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph @@ -186,11 +186,11 @@ entry: ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N]], -4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT15]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; CHECK-NOT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NOT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup @@ -198,7 +198,7 @@ entry: vector.ph: ; preds = %for.body.preheader %n.vec = and i32 %n, -4 %broadcast.splatinsert8 = insertelement <4 x i32> poison, i32 %x, i32 0 - %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> undef, <4 x i32> zeroinitializer + %broadcast.splat9 = shufflevector <4 x i32> %broadcast.splatinsert8, <4 x i32> poison, <4 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %vector.ph diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll new file mode 100644 index 0000000..fbaf7bf --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll @@ -0,0 +1,232 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=armv7-apple-darwin < %s -codegenprepare -S | FileCheck -check-prefix=NEON %s +; RUN: opt -mtriple=armv6-unknown-linux < %s -codegenprepare -S | FileCheck -check-prefix=NONEON %s + +define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; NEON-LABEL: @sink_zext( +; NEON-NEXT: entry: +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; NEON-NEXT: ret <8 x i16> [[RES_2]] +; +; NONEON-LABEL: @sink_zext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> +; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]] +; NONEON-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = zext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = zext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = zext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) { +; NEON-LABEL: @sink_sext( +; NEON-NEXT: entry: +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] +; NEON-NEXT: ret <8 x i16> [[RES_2]] +; +; NONEON-LABEL: @sink_sext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[ZB_1]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; NONEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[ZA]], [[ZB_2]] +; NONEON-NEXT: ret <8 x i16> [[RES_2]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %zb.1 = sext <8 x i8> %b to <8 x i16> + %res.1 = add <8 x i16> %za, %zb.1 + ret <8 x i16> %res.1 + +if.else: + %zb.2 = sext <8 x i8> %b to <8 x i16> + %res.2 = sub <8 x i16> %za, %zb.2 + ret <8 x i16> %res.2 +} + +define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i16> %b, i1 %c) { +; +; NEON-LABEL: @do_not_sink_nonfree_zext( +; NEON-NEXT: entry: +; NEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: ret <8 x i16> [[B]] +; +; NONEON-LABEL: @do_not_sink_nonfree_zext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: ret <8 x i16> [[B]] +; +entry: + %za = zext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %res.1 = add <8 x i16> %za, %b + ret <8 x i16> %res.1 + +if.else: + ret <8 x i16> %b +} + +define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i16> %b, i1 %c) { +; CHECK-LABEL: @do_not_sink_nonfree_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] +; CHECK-NEXT: ret <8 x i16> [[RES_1]] +; CHECK: if.else: +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[ZB_2]] +; +; NEON-LABEL: @do_not_sink_nonfree_sext( +; NEON-NEXT: entry: +; NEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NEON-NEXT: ret <8 x i16> [[RES_1]] +; NEON: if.else: +; NEON-NEXT: ret <8 x i16> [[B]] +; +; NONEON-LABEL: @do_not_sink_nonfree_sext( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[ZA:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NONEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[ZA]], [[B:%.*]] +; NONEON-NEXT: ret <8 x i16> [[RES_1]] +; NONEON: if.else: +; NONEON-NEXT: ret <8 x i16> [[B]] +; +entry: + %za = sext <8 x i8> %a to <8 x i16> + br i1 %c, label %if.then, label %if.else + +if.then: + %res.1 = add <8 x i16> %za, %b + ret <8 x i16> %res.1 + +if.else: + ret <8 x i16> %b +} + +declare void @user1(<8 x i16>) + +; Exts can be sunk. +define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b) { +; NEON-LABEL: @sink_shufflevector_ext_subadd_multiuse( +; NEON-NEXT: entry: +; NEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> +; NEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> +; NEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: call void @user1(<8 x i16> [[Z3]]) +; NEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NEON: if.then: +; NEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]] +; NEON-NEXT: ret <8 x i16> [[RES1]] +; NEON: if.else: +; NEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]] +; NEON-NEXT: ret <8 x i16> [[RES2]] +; +; NONEON-LABEL: @sink_shufflevector_ext_subadd_multiuse( +; NONEON-NEXT: entry: +; NONEON-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> +; NONEON-NEXT: [[Z1:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NONEON-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> +; NONEON-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NONEON-NEXT: call void @user1(<8 x i16> [[Z3]]) +; NONEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; NONEON: if.then: +; NONEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; NONEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> +; NONEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[Z1]], [[Z2]] +; NONEON-NEXT: ret <8 x i16> [[RES1]] +; NONEON: if.else: +; NONEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; NONEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> +; NONEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[Z3]], [[Z4]] +; NONEON-NEXT: ret <8 x i16> [[RES2]] +; +entry: + %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %z1 = zext <8 x i8> %s1 to <8 x i16> + %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> + %z3 = sext <8 x i8> %s3 to <8 x i16> + call void @user1(<8 x i16> %z3) + br i1 undef, label %if.then, label %if.else + +if.then: + %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %z2 = zext <8 x i8> %s2 to <8 x i16> + %res1 = add <8 x i16> %z1, %z2 + ret <8 x i16> %res1 + +if.else: + %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> + %z4 = sext <8 x i8> %s4 to <8 x i16> + %res2 = sub <8 x i16> %z3, %z4 + ret <8 x i16> %res2 +} diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain-inseltpoison.ll index 7cffede..9930b03 100644 --- a/llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain-inseltpoison.ll @@ -15,7 +15,7 @@ define signext i8 @dead(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2 ; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]] ; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>* @@ -30,7 +30,7 @@ entry: %n.vec = and i32 %n, -8 %l0 = trunc i16 %x to i8 %l1 = insertelement <8 x i8> poison, i8 %l0, i32 0 - %broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> zeroinitializer + %broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> poison, <8 x i32> zeroinitializer br label %vector.body vector.body: ; preds = %vector.body, %entry @@ -58,7 +58,7 @@ define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N:%.*]], -8 ; CHECK-NEXT: [[L0:%.*]] = trunc i16 [[X:%.*]] to i8 ; CHECK-NEXT: [[L1:%.*]] = insertelement <8 x i8> poison, i8 [[L0]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT26:%.*]] = shufflevector <8 x i8> [[L1]], <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLAT26:%.*]] = shufflevector <8 x i8> [[L1]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[L2:%.*]] = sub <8 x i8> zeroinitializer, [[BROADCAST_SPLAT26]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -69,7 +69,7 @@ define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2 ; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]] ; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>* @@ -84,7 +84,7 @@ entry: %n.vec = and i32 %n, -8 %l0 = trunc i16 %x to i8 %l1 = insertelement <8 x i8> poison, i8 %l0, i32 0 - %broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> zeroinitializer + %broadcast.splat26 = shufflevector <8 x i8> %l1, <8 x i8> poison, <8 x i32> zeroinitializer %l2 = sub <8 x i8> zeroinitializer, %broadcast.splat26 br label %vector.body diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll new file mode 100644 index 0000000..9eede8c --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/cgp_shuffle_crash-inseltpoison.ll @@ -0,0 +1,14 @@ +; RUN: opt -codegenprepare -S %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: shuffle_one_source + +define <2 x i8> @shuffle_one_source(i32 %x) { + %Shuf = shufflevector <2 x i8> zeroinitializer, <2 x i8> zeroinitializer, <2 x i32> poison + %Cmp = icmp slt i32 480483, %x + %B = mul <2 x i8> %Shuf, %Shuf + %S = select i1 %Cmp, <2 x i8> %B, <2 x i8> zeroinitializer + ret <2 x i8> %Shuf +} + diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll index 88967ac..a1fd88e 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt-inseltpoison.ll @@ -16,7 +16,7 @@ define <4 x i32> @splat_base(i32* %base, <4 x i64> %index) { ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x i32*> poison, i32* %base, i32 0 - %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer + %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> poison, <4 x i32> zeroinitializer %gep = getelementptr i32, <4 x i32*> %broadcast.splat, <4 x i64> %index %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %res @@ -42,7 +42,7 @@ define <4 x i32> @scalar_index(i32* %base, i64 %index) { ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x i32*> poison, i32* %base, i32 0 - %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer + %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> poison, <4 x i32> zeroinitializer %gep = getelementptr i32, <4 x i32*> %broadcast.splat, i64 %index %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %res @@ -56,7 +56,7 @@ define <4 x i32> @splat_index(i32* %base, i64 %index) { ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %index, i32 0 - %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer + %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer %gep = getelementptr i32, i32* %base, <4 x i64> %broadcast.splat %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %res @@ -79,7 +79,7 @@ define <4 x i32> @global_struct_splat() { ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %1 = insertelement <4 x %struct.a*> poison, %struct.a* @c, i32 0 - %2 = shufflevector <4 x %struct.a*> %1, <4 x %struct.a*> undef, <4 x i32> zeroinitializer + %2 = shufflevector <4 x %struct.a*> %1, <4 x %struct.a*> poison, <4 x i32> zeroinitializer %3 = getelementptr %struct.a, <4 x %struct.a*> %2, <4 x i64> zeroinitializer, i32 1 %4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %4 @@ -92,7 +92,7 @@ define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthr ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = insertelement <4 x i32*> poison, i32* %ptr, i32 0 - %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer + %2 = shufflevector <4 x i32*> %1, <4 x i32*> poison, <4 x i32> zeroinitializer %3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru) ret <4 x i32> %3 } @@ -104,7 +104,7 @@ define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { ; CHECK-NEXT: ret void ; %1 = insertelement <4 x i32*> poison, i32* %ptr, i32 0 - %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer + %2 = shufflevector <4 x i32*> %1, <4 x i32*> poison, <4 x i32> zeroinitializer call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %2, i32 4, <4 x i1> %mask) ret void } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll index 1d26bee..1c9617a 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll @@ -8,8 +8,8 @@ define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; AVX1-LABEL: @vector_variable_shift_right_v4i32( -; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer -; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] ; AVX1-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SPLAT1]] ; AVX1-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[Z]], [[SPLAT2]] @@ -17,28 +17,29 @@ define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x ; AVX1-NEXT: ret <4 x i32> [[TMP3]] ; ; AVX2-LABEL: @vector_variable_shift_right_v4i32( -; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer -; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] ; AVX2-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] ; AVX2-NEXT: ret <4 x i32> [[SH]] ; ; AVX512BW-LABEL: @vector_variable_shift_right_v4i32( -; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer -; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX512BW-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] ; AVX512BW-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] ; AVX512BW-NEXT: ret <4 x i32> [[SH]] ; ; XOP-LABEL: @vector_variable_shift_right_v4i32( -; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer -; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer ; XOP-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] ; XOP-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] ; XOP-NEXT: ret <4 x i32> [[SH]] ; - %splat1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer - %splat2 = shufflevector <4 x i32> %y, <4 x i32> undef, <4 x i32> zeroinitializer + + %splat1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer + %splat2 = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> zeroinitializer %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2 %sh = lshr <4 x i32> %z, %sel ret <4 x i32> %sh @@ -46,8 +47,8 @@ define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { ; AVX1-LABEL: @vector_variable_shift_right_v16i16( -; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] ; AVX1-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]] ; AVX1-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]] @@ -55,8 +56,8 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 ; AVX1-NEXT: ret <16 x i16> [[TMP3]] ; ; AVX2-LABEL: @vector_variable_shift_right_v16i16( -; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] ; AVX2-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SPLAT1]] ; AVX2-NEXT: [[TMP2:%.*]] = lshr <16 x i16> [[Z]], [[SPLAT2]] @@ -64,21 +65,22 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 ; AVX2-NEXT: ret <16 x i16> [[TMP3]] ; ; AVX512BW-LABEL: @vector_variable_shift_right_v16i16( -; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer -; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer ; AVX512BW-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] ; AVX512BW-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] ; AVX512BW-NEXT: ret <16 x i16> [[SH]] ; ; XOP-LABEL: @vector_variable_shift_right_v16i16( -; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer -; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <16 x i16> [[X:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <16 x i16> [[Y:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer ; XOP-NEXT: [[SEL:%.*]] = select <16 x i1> [[COND:%.*]], <16 x i16> [[SPLAT1]], <16 x i16> [[SPLAT2]] ; XOP-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] ; XOP-NEXT: ret <16 x i16> [[SH]] ; - %splat1 = shufflevector <16 x i16> %x, <16 x i16> undef, <16 x i32> zeroinitializer - %splat2 = shufflevector <16 x i16> %y, <16 x i16> undef, <16 x i32> zeroinitializer + + %splat1 = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> zeroinitializer + %splat2 = shufflevector <16 x i16> %y, <16 x i16> poison, <16 x i32> zeroinitializer %sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2 %sh = lshr <16 x i16> %z, %sel ret <16 x i16> %sh @@ -86,14 +88,15 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { ; ALL-LABEL: @vector_variable_shift_right_v32i8( -; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer -; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer ; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] ; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] ; ALL-NEXT: ret <32 x i8> [[SH]] ; - %splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer - %splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer + + %splat1 = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> zeroinitializer + %splat2 = shufflevector <32 x i8> %y, <32 x i8> poison, <32 x i32> zeroinitializer %sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2 %sh = lshr <32 x i8> %z, %sel ret <32 x i8> %sh @@ -110,11 +113,11 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX1: vector.ph: ; AVX1-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 ; AVX1-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0 -; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0 -; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; AVX1-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -123,9 +126,9 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 ; AVX1-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer ; AVX1-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[SPLAT1]], <4 x i32> [[SPLAT2]] -; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP4]] -; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX1-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[SPLAT3]], [[TMP6]] ; AVX1-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP5]], <4 x i32> [[TMP7]] ; AVX1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] @@ -145,11 +148,11 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX2: vector.ph: ; AVX2-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 ; AVX2-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0 -; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0 -; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; AVX2-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -176,11 +179,11 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX512BW: vector.ph: ; AVX512BW-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 ; AVX512BW-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0 -; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX512BW-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0 -; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX512BW-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; AVX512BW-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX512BW-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512BW: vector.body: ; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -207,11 +210,11 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; XOP: vector.ph: ; XOP-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 ; XOP-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> poison, i32 [[AMT0:%.*]], i32 0 -; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> undef, <4 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <4 x i32> [[SPLATINSERT18]], <4 x i32> poison, <4 x i32> zeroinitializer ; XOP-NEXT: [[SPLATINSERT20:%.*]] = insertelement <4 x i32> poison, i32 [[AMT1:%.*]], i32 0 -; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> undef, <4 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <4 x i32> [[SPLATINSERT20]], <4 x i32> poison, <4 x i32> zeroinitializer ; XOP-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0 -; XOP-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> undef, <4 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT3:%.*]] = shufflevector <4 x i32> [[SPLATINSERT22]], <4 x i32> poison, <4 x i32> zeroinitializer ; XOP-NEXT: br label [[VECTOR_BODY:%.*]] ; XOP: vector.body: ; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -230,6 +233,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; XOP: exit: ; XOP-NEXT: ret void ; + entry: %cmp16 = icmp sgt i32 %count, 0 %wide.trip.count = zext i32 %count to i64 @@ -238,11 +242,11 @@ entry: vector.ph: %n.vec = and i64 %wide.trip.count, 4294967292 %splatinsert18 = insertelement <4 x i32> poison, i32 %amt0, i32 0 - %splat1 = shufflevector <4 x i32> %splatinsert18, <4 x i32> undef, <4 x i32> zeroinitializer + %splat1 = shufflevector <4 x i32> %splatinsert18, <4 x i32> poison, <4 x i32> zeroinitializer %splatinsert20 = insertelement <4 x i32> poison, i32 %amt1, i32 0 - %splat2 = shufflevector <4 x i32> %splatinsert20, <4 x i32> undef, <4 x i32> zeroinitializer + %splat2 = shufflevector <4 x i32> %splatinsert20, <4 x i32> poison, <4 x i32> zeroinitializer %splatinsert22 = insertelement <4 x i32> poison, i32 %x, i32 0 - %splat3 = shufflevector <4 x i32> %splatinsert22, <4 x i32> undef, <4 x i32> zeroinitializer + %splat3 = shufflevector <4 x i32> %splatinsert22, <4 x i32> poison, <4 x i32> zeroinitializer br label %vector.body vector.body: @@ -272,9 +276,9 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; AVX1-LABEL: @fancierRotate2( ; AVX1-NEXT: entry: ; AVX1-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0 -; AVX1-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0 -; AVX1-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: br label [[LOOP:%.*]] ; AVX1: loop: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] @@ -286,9 +290,9 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; AVX1-NEXT: [[T4:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] ; AVX1-NEXT: [[T5:%.*]] = bitcast i32* [[T4]] to <8 x i32>* ; AVX1-NEXT: [[WIDE_LOAD21:%.*]] = load <8 x i32>, <8 x i32>* [[T5]], align 4 -; AVX1-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP0]]) -; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD21]], <8 x i32> [[WIDE_LOAD21]], <8 x i32> [[TMP2]]) ; AVX1-NEXT: [[TMP4:%.*]] = select <8 x i1> [[T2]], <8 x i32> [[TMP1]], <8 x i32> [[TMP3]] ; AVX1-NEXT: store <8 x i32> [[TMP4]], <8 x i32>* [[T5]], align 4 @@ -301,9 +305,9 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; AVX2-LABEL: @fancierRotate2( ; AVX2-NEXT: entry: ; AVX2-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0 -; AVX2-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0 -; AVX2-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX2-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: br label [[LOOP:%.*]] ; AVX2: loop: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] @@ -326,9 +330,9 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; AVX512BW-LABEL: @fancierRotate2( ; AVX512BW-NEXT: entry: ; AVX512BW-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0 -; AVX512BW-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX512BW-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0 -; AVX512BW-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer +; AVX512BW-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> poison, <8 x i32> zeroinitializer ; AVX512BW-NEXT: br label [[LOOP:%.*]] ; AVX512BW: loop: ; AVX512BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] @@ -351,9 +355,9 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; XOP-LABEL: @fancierRotate2( ; XOP-NEXT: entry: ; XOP-NEXT: [[I0:%.*]] = insertelement <8 x i32> poison, i32 [[ROT0:%.*]], i32 0 -; XOP-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> undef, <8 x i32> zeroinitializer +; XOP-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[I0]], <8 x i32> poison, <8 x i32> zeroinitializer ; XOP-NEXT: [[I1:%.*]] = insertelement <8 x i32> poison, i32 [[ROT1:%.*]], i32 0 -; XOP-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> undef, <8 x i32> zeroinitializer +; XOP-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[I1]], <8 x i32> poison, <8 x i32> zeroinitializer ; XOP-NEXT: br label [[LOOP:%.*]] ; XOP: loop: ; XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] @@ -373,11 +377,12 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; XOP: exit: ; XOP-NEXT: ret void ; + entry: %i0 = insertelement <8 x i32> poison, i32 %rot0, i32 0 - %s0 = shufflevector <8 x i32> %i0, <8 x i32> undef, <8 x i32> zeroinitializer + %s0 = shufflevector <8 x i32> %i0, <8 x i32> poison, <8 x i32> zeroinitializer %i1 = insertelement <8 x i32> poison, i32 %rot1, i32 0 - %s1 = shufflevector <8 x i32> %i1, <8 x i32> undef, <8 x i32> zeroinitializer + %s1 = shufflevector <8 x i32> %i1, <8 x i32> poison, <8 x i32> zeroinitializer br label %loop loop: diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll index 4e9f09f..be3723b 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll @@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin10.9.0" define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { ; CHECK-LABEL: @test_8bit( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if_true: ; CHECK-NEXT: ret <16 x i8> [[MASK]] @@ -17,7 +17,7 @@ define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { ; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] ; CHECK-NEXT: ret <16 x i8> [[RES]] ; - %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer + %mask = shufflevector <16 x i8> %tmp, <16 x i8> poison, <16 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false if_true: @@ -30,17 +30,17 @@ if_false: define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) { ; CHECK-SSE2-LABEL: @test_16bit( -; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-SSE2: if_true: ; CHECK-SSE2-NEXT: ret <8 x i16> [[MASK]] ; CHECK-SSE2: if_false: -; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]] ; CHECK-SSE2-NEXT: ret <8 x i16> [[RES]] ; ; CHECK-XOP-LABEL: @test_16bit( -; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-XOP: if_true: ; CHECK-XOP-NEXT: ret <8 x i16> [[MASK]] @@ -49,17 +49,17 @@ define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) { ; CHECK-XOP-NEXT: ret <8 x i16> [[RES]] ; ; CHECK-AVX2-LABEL: @test_16bit( -; CHECK-AVX2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-AVX2-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-AVX2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-AVX2: if_true: ; CHECK-AVX2-NEXT: ret <8 x i16> [[MASK]] ; CHECK-AVX2: if_false: -; CHECK-AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-AVX2-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[TMP1]] ; CHECK-AVX2-NEXT: ret <8 x i16> [[RES]] ; ; CHECK-AVX512BW-LABEL: @test_16bit( -; CHECK-AVX512BW-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-AVX512BW-NEXT: [[MASK:%.*]] = shufflevector <8 x i16> [[TMP:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-AVX512BW-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-AVX512BW: if_true: ; CHECK-AVX512BW-NEXT: ret <8 x i16> [[MASK]] @@ -67,7 +67,7 @@ define <8 x i16> @test_16bit(<8 x i16> %lhs, <8 x i16> %tmp, i1 %tst) { ; CHECK-AVX512BW-NEXT: [[RES:%.*]] = shl <8 x i16> [[LHS:%.*]], [[MASK]] ; CHECK-AVX512BW-NEXT: ret <8 x i16> [[RES]] ; - %mask = shufflevector <8 x i16> %tmp, <8 x i16> undef, <8 x i32> zeroinitializer + %mask = shufflevector <8 x i16> %tmp, <8 x i16> poison, <8 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false if_true: @@ -80,7 +80,7 @@ if_false: define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-LABEL: @test_notsplat( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if_true: ; CHECK-NEXT: ret <4 x i32> [[MASK]] @@ -88,7 +88,7 @@ define <4 x i32> @test_notsplat(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[LHS:%.*]], [[MASK]] ; CHECK-NEXT: ret <4 x i32> [[RES]] ; - %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> + %mask = shufflevector <4 x i32> %tmp, <4 x i32> poison, <4 x i32> br i1 %tst, label %if_true, label %if_false if_true: @@ -101,17 +101,17 @@ if_false: define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-SSE2-LABEL: @test_32bit( -; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-SSE2: if_true: ; CHECK-SSE2-NEXT: ret <4 x i32> [[MASK]] ; CHECK-SSE2: if_false: -; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <4 x i32> +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> poison, <4 x i32> ; CHECK-SSE2-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[TMP1]] ; CHECK-SSE2-NEXT: ret <4 x i32> [[RES]] ; ; CHECK-XOP-LABEL: @test_32bit( -; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-XOP: if_true: ; CHECK-XOP-NEXT: ret <4 x i32> [[MASK]] @@ -120,7 +120,7 @@ define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-XOP-NEXT: ret <4 x i32> [[RES]] ; ; CHECK-AVX-LABEL: @test_32bit( -; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-AVX: if_true: ; CHECK-AVX-NEXT: ret <4 x i32> [[MASK]] @@ -128,7 +128,7 @@ define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-AVX-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[MASK]] ; CHECK-AVX-NEXT: ret <4 x i32> [[RES]] ; - %mask = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> + %mask = shufflevector <4 x i32> %tmp, <4 x i32> poison, <4 x i32> br i1 %tst, label %if_true, label %if_false if_true: @@ -141,17 +141,17 @@ if_false: define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) { ; CHECK-SSE2-LABEL: @test_64bit( -; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-SSE2: if_true: ; CHECK-SSE2-NEXT: ret <2 x i64> [[MASK]] ; CHECK-SSE2: if_false: -; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-SSE2-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[TMP1]] ; CHECK-SSE2-NEXT: ret <2 x i64> [[RES]] ; ; CHECK-XOP-LABEL: @test_64bit( -; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-XOP: if_true: ; CHECK-XOP-NEXT: ret <2 x i64> [[MASK]] @@ -160,7 +160,7 @@ define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) { ; CHECK-XOP-NEXT: ret <2 x i64> [[RES]] ; ; CHECK-AVX-LABEL: @test_64bit( -; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <2 x i64> [[TMP:%.*]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-AVX: if_true: ; CHECK-AVX-NEXT: ret <2 x i64> [[MASK]] @@ -168,7 +168,7 @@ define <2 x i64> @test_64bit(<2 x i64> %lhs, <2 x i64> %tmp, i1 %tst) { ; CHECK-AVX-NEXT: [[RES:%.*]] = lshr <2 x i64> [[LHS:%.*]], [[MASK]] ; CHECK-AVX-NEXT: ret <2 x i64> [[RES]] ; - %mask = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer + %mask = shufflevector <2 x i64> %tmp, <2 x i64> poison, <2 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false if_true: @@ -189,7 +189,7 @@ define void @funnel_splatvar(i32* nocapture %arr, i32 %rot) { ; CHECK-SSE2-NEXT: [[T0:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[INDEX]] ; CHECK-SSE2-NEXT: [[T1:%.*]] = bitcast i32* [[T0]] to <8 x i32>* ; CHECK-SSE2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[T1]], align 4 -; CHECK-SSE2-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-SSE2-NEXT: [[T2:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[WIDE_LOAD]], <8 x i32> [[WIDE_LOAD]], <8 x i32> [[TMP0]]) ; CHECK-SSE2-NEXT: store <8 x i32> [[T2]], <8 x i32>* [[T1]], align 4 ; CHECK-SSE2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 @@ -201,7 +201,7 @@ define void @funnel_splatvar(i32* nocapture %arr, i32 %rot) { ; CHECK-XOP-LABEL: @funnel_splatvar( ; CHECK-XOP-NEXT: entry: ; CHECK-XOP-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[ROT:%.*]], i32 0 -; CHECK-XOP-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-XOP-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-XOP-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-XOP: vector.body: ; CHECK-XOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -219,7 +219,7 @@ define void @funnel_splatvar(i32* nocapture %arr, i32 %rot) { ; CHECK-AVX-LABEL: @funnel_splatvar( ; CHECK-AVX-NEXT: entry: ; CHECK-AVX-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[ROT:%.*]], i32 0 -; CHECK-AVX-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-AVX-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-AVX: vector.body: ; CHECK-AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -236,7 +236,7 @@ define void @funnel_splatvar(i32* nocapture %arr, i32 %rot) { ; entry: %broadcast.splatinsert15 = insertelement <8 x i32> poison, i32 %rot, i32 0 - %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> undef, <8 x i32> zeroinitializer + %broadcast.splat16 = shufflevector <8 x i32> %broadcast.splatinsert15, <8 x i32> poison, <8 x i32> zeroinitializer br label %vector.body vector.body: diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll new file mode 100644 index 0000000..4f35925 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -tbaa -dse -enable-dse-memoryssa=false -S < %s | FileCheck %s +; RUN: opt -tbaa -dse -enable-dse-memoryssa=true -S < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" + +define dllexport i32 @f0(i8** %a0, i8** %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 { +; CHECK-LABEL: @f0( +; CHECK-NEXT: b0: +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds i8*, i8** [[A0:%.*]], i32 [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = load i8*, i8** [[V0]], align 4, [[TBAA0:!tbaa !.*]] +; CHECK-NEXT: [[V2:%.*]] = getelementptr i8, i8* [[V1]], i32 [[A3:%.*]] +; CHECK-NEXT: [[V3:%.*]] = bitcast i8* [[V2]] to <128 x i8>* +; CHECK-NEXT: [[V6:%.*]] = getelementptr inbounds i8*, i8** [[A1:%.*]], i32 [[A4:%.*]] +; CHECK-NEXT: [[V7:%.*]] = load i8*, i8** [[V6]], align 4, [[TBAA3:!tbaa !.*]] +; CHECK-NEXT: [[V8:%.*]] = getelementptr i8, i8* [[V7]], i32 [[A5:%.*]] +; CHECK-NEXT: [[V9:%.*]] = bitcast i8* [[V8]] to <128 x i8>* +; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V9]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA5:!tbaa !.*]] +; CHECK-NEXT: [[V11:%.*]] = shufflevector <128 x i8> [[V10]], <128 x i8> poison, <32 x i32> +; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> poison, <128 x i32> +; CHECK-NEXT: [[V16:%.*]] = shufflevector <128 x i8> [[V14]], <128 x i8> poison, <32 x i32> +; CHECK-NEXT: [[V17:%.*]] = getelementptr inbounds i8*, i8** [[A1]], i32 [[A6:%.*]] +; CHECK-NEXT: [[V18:%.*]] = load i8*, i8** [[V17]], align 4, [[TBAA3]] +; CHECK-NEXT: [[V19:%.*]] = getelementptr i8, i8* [[V18]], i32 [[A7:%.*]] +; CHECK-NEXT: [[V20:%.*]] = bitcast i8* [[V19]] to <128 x i8>* +; CHECK-NEXT: [[V21:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[V20]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA5]] +; CHECK-NEXT: [[V22:%.*]] = shufflevector <128 x i8> [[V21]], <128 x i8> poison, <32 x i32> +; CHECK-NEXT: [[V23:%.*]] = icmp ugt <32 x i8> [[V16]], [[V22]] +; CHECK-NEXT: [[V24:%.*]] = select <32 x i1> [[V23]], <32 x i8> [[V16]], <32 x i8> [[V22]] +; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> poison, <128 x i32> +; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V25]], <128 x i8>* [[V3]], i32 32, <128 x i1> ), [[TBAA8:!tbaa !.*]] +; CHECK-NEXT: ret i32 0 +; +b0: + %v0 = getelementptr inbounds i8*, i8** %a0, i32 %a2 + %v1 = load i8*, i8** %v0, align 4, !tbaa !0 + %v2 = getelementptr i8, i8* %v1, i32 %a3 + %v3 = bitcast i8* %v2 to <128 x i8>* + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> , <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + %v6 = getelementptr inbounds i8*, i8** %a1, i32 %a4 + %v7 = load i8*, i8** %v6, align 4, !tbaa !6 + %v8 = getelementptr i8, i8* %v7, i32 %a5 + %v9 = bitcast i8* %v8 to <128 x i8>* + %v10 = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %v9, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 + %v11 = shufflevector <128 x i8> %v10, <128 x i8> poison, <32 x i32> + %v14 = shufflevector <32 x i8> %v11, <32 x i8> poison, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v14, <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + %v16 = shufflevector <128 x i8> %v14, <128 x i8> poison, <32 x i32> + %v17 = getelementptr inbounds i8*, i8** %a1, i32 %a6 + %v18 = load i8*, i8** %v17, align 4, !tbaa !6 + %v19 = getelementptr i8, i8* %v18, i32 %a7 + %v20 = bitcast i8* %v19 to <128 x i8>* + %v21 = tail call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %v20, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 + %v22 = shufflevector <128 x i8> %v21, <128 x i8> poison, <32 x i32> + %v23 = icmp ugt <32 x i8> %v16, %v22 + %v24 = select <32 x i1> %v23, <32 x i8> %v16, <32 x i8> %v22 + %v25 = shufflevector <32 x i8> %v24, <32 x i8> poison, <128 x i32> + tail call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v25, <128 x i8>* %v3, i32 32, <128 x i1> ), !tbaa !3 + ret i32 0 +} + +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32 immarg, <128 x i1>) #1 +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>) #2 + +attributes #0 = { nounwind willreturn } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind readonly willreturn } + +!0 = !{!1, !1, i64 0} +!1 = !{!"0x2cf74d0", !2, i64 0} +!2 = !{!"tvm-tbaa"} +!3 = !{!4, !4, i64 0} +!4 = !{!"i8", !5, i64 0} +!5 = !{!"0x2c6ebb0", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"0x2cff870", !2, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"i8", !10, i64 0} +!10 = !{!"0x2c6c3c0", !2, i64 0} diff --git a/llvm/test/Transforms/Inline/inlined-loop-metadata-inseltpoison.ll b/llvm/test/Transforms/Inline/inlined-loop-metadata-inseltpoison.ll new file mode 100755 index 0000000..60c2f42 --- /dev/null +++ b/llvm/test/Transforms/Inline/inlined-loop-metadata-inseltpoison.ll @@ -0,0 +1,159 @@ +; This test checks that the !llvm.loop metadata has been updated after inlining +; so that the start and end locations refer to the inlined DILocations. + +; RUN: opt -inline -always-inline %s -S 2>&1 | FileCheck %s +; CHECK: br i1 %{{.*}}, label %middle.block.i, label %vector.body.i, !dbg !{{[0-9]+}}, !llvm.loop [[VECTOR:![0-9]+]] +; CHECK: br i1 %{{.*}}, label %for.cond.cleanup.loopexit.i, label %for.body.i, !dbg !{{[0-9]+}}, !llvm.loop [[SCALAR:![0-9]+]] +; CHECK-DAG: [[VECTOR]] = distinct !{[[VECTOR]], [[START:![0-9]+]], [[END:![0-9]+]], [[IS_VECTORIZED:![0-9]+]]} +; CHECK-DAG: [[SCALAR]] = distinct !{[[SCALAR]], [[START]], [[END]], [[NO_UNROLL:![0-9]+]], [[IS_VECTORIZED]]} +; CHECK-DAG: [[IS_VECTORIZED]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-DAG: [[NO_UNROLL]] = !{!"llvm.loop.unroll.runtime.disable"} + +; This IR can be generated by running: +; clang -emit-llvm -S -gmlt -O2 inlined.cpp -o - -mllvm -opt-bisect-limit=53 |\ +; opt -loop-vectorize +; +; Where inlined.cpp contains: +; extern int *Array; +; static int bar(unsigned x) +; { +; int Ret = 0; +; for (unsigned i = 0; i < x; ++i) +; { +; Ret += Array[i] * i; +; } +; return Ret; +; } +; +; int foo(unsigned x) +; { +; int Bar = bar(x); +; return Bar; +; } + +@"?Array@@3PEAHEA" = external dso_local local_unnamed_addr global i32*, align 8 + +define dso_local i32 @"?foo@@YAHI@Z"(i32 %x) local_unnamed_addr !dbg !8 { +entry: + %call = call fastcc i32 @"?bar@@YAHI@Z"(i32 %x), !dbg !10 + ret i32 %call, !dbg !11 +} + +define internal fastcc i32 @"?bar@@YAHI@Z"(i32 %x) unnamed_addr !dbg !12 { +entry: + %cmp7 = icmp eq i32 %x, 0, !dbg !13 + br i1 %cmp7, label %for.cond.cleanup, label %for.body.lr.ph, !dbg !13 + +for.body.lr.ph: ; preds = %entry + %0 = load i32*, i32** @"?Array@@3PEAHEA", align 8, !dbg !14, !tbaa !15 + %wide.trip.count = zext i32 %x to i64, !dbg !14 + %min.iters.check = icmp ult i64 %wide.trip.count, 8, !dbg !13 + br i1 %min.iters.check, label %scalar.ph, label %vector.ph, !dbg !13 + +vector.ph: ; preds = %for.body.lr.ph + %n.mod.vf = urem i64 %wide.trip.count, 8, !dbg !13 + %n.vec = sub i64 %wide.trip.count, %n.mod.vf, !dbg !13 + br label %vector.body, !dbg !13 + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ], !dbg !13 + %vec.ind = phi <4 x i64> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] + %vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %17, %vector.body ] + %vec.phi2 = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %18, %vector.body ] + %vec.ind4 = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next7, %vector.body ], !dbg !19 + %step.add = add <4 x i64> %vec.ind, + %1 = add i64 %index, 0, !dbg !13 + %2 = add i64 %index, 1, !dbg !13 + %3 = add i64 %index, 2, !dbg !13 + %4 = add i64 %index, 3, !dbg !13 + %5 = add i64 %index, 4, !dbg !13 + %6 = add i64 %index, 5, !dbg !13 + %7 = add i64 %index, 6, !dbg !13 + %8 = add i64 %index, 7, !dbg !13 + %9 = getelementptr inbounds i32, i32* %0, i64 %1, !dbg !19 + %10 = getelementptr inbounds i32, i32* %0, i64 %5, !dbg !19 + %11 = getelementptr inbounds i32, i32* %9, i32 0, !dbg !19 + %12 = bitcast i32* %11 to <4 x i32>*, !dbg !19 + %wide.load = load <4 x i32>, <4 x i32>* %12, align 4, !dbg !19, !tbaa !20 + %13 = getelementptr inbounds i32, i32* %9, i32 4, !dbg !19 + %14 = bitcast i32* %13 to <4 x i32>*, !dbg !19 + %wide.load3 = load <4 x i32>, <4 x i32>* %14, align 4, !dbg !19, !tbaa !20 + %step.add5 = add <4 x i32> %vec.ind4, , !dbg !19 + %15 = mul <4 x i32> %wide.load, %vec.ind4, !dbg !19 + %16 = mul <4 x i32> %wide.load3, %step.add5, !dbg !19 + %17 = add <4 x i32> %15, %vec.phi, !dbg !19 + %18 = add <4 x i32> %16, %vec.phi2, !dbg !19 + %index.next = add i64 %index, 8, !dbg !13 + %vec.ind.next = add <4 x i64> %step.add, + %vec.ind.next7 = add <4 x i32> %step.add5, , !dbg !19 + %19 = icmp eq i64 %index.next, %n.vec, !dbg !13 + br i1 %19, label %middle.block, label %vector.body, !dbg !13, !llvm.loop !22 + +middle.block: ; preds = %vector.body + %bin.rdx = add <4 x i32> %18, %17, !dbg !19 + %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> poison, <4 x i32> , !dbg !19 + %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf, !dbg !19 + %rdx.shuf9 = shufflevector <4 x i32> %bin.rdx8, <4 x i32> poison, <4 x i32> , !dbg !19 + %bin.rdx10 = add <4 x i32> %bin.rdx8, %rdx.shuf9, !dbg !19 + %20 = extractelement <4 x i32> %bin.rdx10, i32 0, !dbg !19 + %cmp.n = icmp eq i64 %wide.trip.count, %n.vec, !dbg !13 + br i1 %cmp.n, label %for.cond.cleanup.loopexit, label %scalar.ph, !dbg !13 + +scalar.ph: ; preds = %middle.block, %for.body.lr.ph + %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %for.body.lr.ph ] + %bc.merge.rdx = phi i32 [ 0, %for.body.lr.ph ], [ %20, %middle.block ] + br label %for.body, !dbg !13 + +for.cond.cleanup.loopexit: ; preds = %middle.block, %for.body + %add.lcssa = phi i32 [ %add, %for.body ], [ %20, %middle.block ], !dbg !19 + br label %for.cond.cleanup, !dbg !25 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %Ret.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ], !dbg !14 + ret i32 %Ret.0.lcssa, !dbg !25 + +for.body: ; preds = %for.body, %scalar.ph + %indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %for.body ] + %Ret.08 = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %0, i64 %indvars.iv, !dbg !19 + %21 = load i32, i32* %arrayidx, align 4, !dbg !19, !tbaa !20 + %22 = trunc i64 %indvars.iv to i32, !dbg !19 + %mul = mul i32 %21, %22, !dbg !19 + %add = add i32 %mul, %Ret.08, !dbg !19 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !13 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !13 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !13, !llvm.loop !26 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0 (https://github.com/llvm/llvm-project.git b1e28d9b6a16380ccf1456fe0695f639364407a9)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "inlined.cpp", directory: "") +!2 = !{} +!3 = !{i32 2, !"CodeView", i32 1} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 2} +!6 = !{i32 7, !"PIC Level", i32 2} +!7 = !{!"clang version 9.0.0 (https://github.com/llvm/llvm-project.git b1e28d9b6a16380ccf1456fe0695f639364407a9)"} +!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 13, type: !9, scopeLine: 14, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !2) +!10 = !DILocation(line: 15, scope: !8) +!11 = !DILocation(line: 16, scope: !8) +!12 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !9, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!13 = !DILocation(line: 6, scope: !12) +!14 = !DILocation(line: 0, scope: !12) +!15 = !{!16, !16, i64 0} +!16 = !{!"any pointer", !17, i64 0} +!17 = !{!"omnipotent char", !18, i64 0} +!18 = !{!"Simple C++ TBAA"} +!19 = !DILocation(line: 8, scope: !12) +!20 = !{!21, !21, i64 0} +!21 = !{!"int", !17, i64 0} +!22 = distinct !{!22, !13, !23, !24} +!23 = !DILocation(line: 9, scope: !12) +!24 = !{!"llvm.loop.isvectorized", i32 1} +!25 = !DILocation(line: 10, scope: !12) +!26 = distinct !{!26, !13, !23, !27, !24} +!27 = !{!"llvm.loop.unroll.runtime.disable"} diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll index 8363ed4..a8b4601 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll @@ -99,7 +99,7 @@ define amdgpu_ps float @extract_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i3 ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -109,17 +109,17 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v4f32(<4 x i32> inre ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_buffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -128,27 +128,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_buffer_load_v4f32(<4 x i32> inre ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_buffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_buffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -199,7 +199,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i3 %elt2 = extractelement <4 x float> %data, i32 2 %ins0 = insertelement <2 x float> poison, float %elt0, i32 0 %ins1 = insertelement <2 x float> %ins0, float %elt2, i32 1 - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> %ret = fadd <2 x float> %ins1, %shuf ret <2 x float> %ret } @@ -270,17 +270,17 @@ define amdgpu_ps float @extract_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i3 ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_buffer_load_v3f32( ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -325,7 +325,7 @@ define amdgpu_ps float @extract_elt0_buffer_load_format_v2f32(<4 x i32> inreg %r ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -334,7 +334,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v3f32(<4 x i3 ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -480,7 +480,7 @@ define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -490,7 +490,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -500,7 +500,7 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -509,7 +509,7 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -519,17 +519,17 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -567,7 +567,7 @@ define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -577,7 +577,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -674,7 +674,7 @@ define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, ; CHECK-NEXT: ret <2 x half> define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> + %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> ret <2 x half> %shuf } @@ -737,7 +737,7 @@ define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i3 ; CHECK-NEXT: ret <2 x i8> define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> + %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> ret <2 x i8> %shuf } @@ -837,7 +837,7 @@ define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -847,7 +847,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> in ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -857,7 +857,7 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> in ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -866,17 +866,17 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> in ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -914,7 +914,7 @@ define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -924,7 +924,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> in ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -932,11 +932,11 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> in ; to vec4 anyway during lowering. ; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -1032,7 +1032,7 @@ define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i ; CHECK-NEXT: ret <2 x half> define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> + %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> ret <2 x half> %shuf } @@ -1095,7 +1095,7 @@ define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 ; CHECK-NEXT: ret <2 x i8> define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0) - %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> + %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> ret <2 x i8> %shuf } @@ -1203,7 +1203,7 @@ define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inre ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1213,17 +1213,17 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1232,27 +1232,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -1290,17 +1290,17 @@ define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inre ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32( ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1438,7 +1438,7 @@ define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %r ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1448,7 +1448,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i3 ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1458,7 +1458,7 @@ define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i3 ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1467,7 +1467,7 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i3 ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -1477,17 +1477,17 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -1525,7 +1525,7 @@ define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %r ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1535,7 +1535,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i3 ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1632,7 +1632,7 @@ define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rs ; CHECK-NEXT: ret <2 x half> define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x half> %data, <4 x half> undef, <2 x i32> + %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> ret <2 x half> %shuf } @@ -1695,7 +1695,7 @@ define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, ; CHECK-NEXT: ret <2 x i8> define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x i8> %data, <4 x i8> undef, <2 x i32> + %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> ret <2 x i8> %shuf } @@ -1803,7 +1803,7 @@ define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> i ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1813,17 +1813,17 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32( ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -1832,27 +1832,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32( ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -1890,17 +1890,17 @@ define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> i ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32( ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) -; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2023,7 +2023,7 @@ define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsr ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2033,17 +2033,17 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2052,27 +2052,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2110,17 +2110,17 @@ define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsr ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32( ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2286,7 +2286,7 @@ define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg % ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2296,17 +2296,17 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2315,27 +2315,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2373,17 +2373,17 @@ define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg % ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32( ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) -; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2505,7 +2505,7 @@ define amdgpu_ps float @extract_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2515,17 +2515,17 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v4f32(<4 x i32> inr ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt2_elt3_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2534,27 +2534,27 @@ define amdgpu_ps <2 x float> @extract_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inr ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_elt3_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } ; CHECK-LABEL: @extract_elt0_elt2_elt3_tbuffer_load_v4f32( ; CHECK-NEXT: %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> +; CHECK-NEXT: %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2592,17 +2592,17 @@ define amdgpu_ps float @extract_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i ; CHECK-NEXT: ret <2 x float> define amdgpu_ps <2 x float> @extract_elt0_elt1_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } ; CHECK-LABEL: @extract_elt1_elt2_tbuffer_load_v3f32( ; CHECK-NEXT: %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) -; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> +; CHECK-NEXT: %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> %shuf define amdgpu_ps <2 x float> @extract_elt1_elt2_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1) #0 { %data = call <3 x float> @llvm.amdgcn.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 0, i32 0, i32 14, i32 4, i1 false, i1 false) - %shuf = shufflevector <3 x float> %data, <3 x float> undef, <2 x i32> + %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2755,7 +2755,7 @@ define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float ; CHECK-NEXT: ret <2 x float> %1 define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2764,7 +2764,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32 ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2773,7 +2773,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32 ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2782,7 +2782,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32 ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2792,7 +2792,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32 ; CHECK-NEXT: ret <3 x float> %1 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2802,7 +2802,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_ ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2812,7 +2812,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_ ; CHECK-NEXT: ret <3 x float> %shuf define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2821,7 +2821,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_ ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2830,7 +2830,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_ ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2924,7 +2924,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, flo ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2939,7 +2939,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, ; CHECK-NEXT: ret <2 x float> %data define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <2 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> ret <2 x float> %shuf } @@ -2954,7 +2954,7 @@ declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i ; CHECK-NEXT: ret <3 x float> %data define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %shuf = shufflevector <4 x float> %data, <4 x float> undef, <3 x i32> + %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> ret <3 x float> %shuf } @@ -2997,7 +2997,7 @@ define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %d ; CHECK-NEXT: ret <4 x half> %res define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> + %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } @@ -3007,17 +3007,17 @@ define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32 ; CHECK-NEXT: ret <4 x half> %res define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> + %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32( ; CHECK-NEXT: %data = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32(i32 1, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) -; CHECK-NEXT: %res = insertelement <4 x half> undef, half %data, i64 0 +; CHECK-NEXT: %res = insertelement <4 x half> poison, half %data, i64 0 ; CHECK-NEXT: ret <4 x half> %res define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) - %res = shufflevector <4 x half> %data, <4 x half> undef, <4 x i32> + %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> ret <4 x half> %res } diff --git a/llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll new file mode 100644 index 0000000..95b453a --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll @@ -0,0 +1,110 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +; CHECK-NOT: shufflevector{{.*}}i32 8" + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9" + %struct.ActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 } + %struct.AlphaTest = type { float, i16, i8, i8 } + %struct.ArrayRange = type { i8, i8, i8, i8 } + %struct.BlendMode = type { i16, i16, i16, i16, %struct.IColor4, i16, i16, i8, i8, i8, i8 } + %struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 } + %struct.ClipPlane = type { i32, [6 x %struct.IColor4] } + %struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] } + %struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale } + %struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 } + %struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double } + %struct.FixedFunction = type { %struct.PPStreamToken* } + %struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 } + %struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 } + %struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 } + %struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 } + %struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] } + %struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float } + %struct.LightModel = type { %struct.IColor4, [8 x %struct.Light], [2 x %struct.Material], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 } + %struct.LightProduct = type { %struct.IColor4, %struct.IColor4, %struct.IColor4 } + %struct.LineMode = type { float, i32, i16, i16, i8, i8, i8, i8 } + %struct.LogicOp = type { i16, i8, i8 } + %struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 } + %struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] } + %struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] } + %struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 } + %struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] } + %struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 } + %struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* } + %struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } + %struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 } + %struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 } + %struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack } + %struct.PixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float } + %struct.PluginBufferData = type { i32 } + %struct.PointLineLimits = type { float, float, float } + %struct.PointMode = type { float, float, float, float, %struct.PointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 } + %struct.PolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 } + %struct.ProgramLimits = type { i32, i32, i32, i32 } + %struct.RegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.IColor4], [8 x %struct.RegisterCombinersPerStageState], %struct.RegisterCombinersFinalStageState } + %struct.RegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.RegisterCombinersPerVariableState] } + %struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 } + %struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] } + %struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 } + %struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 } + %struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 } + %struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }> + %struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] } + %struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 } + %struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] } + %struct.TextureImageMode = type { float } + %struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* } + %struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float } + %struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* } + %struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] } + %struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] } + %struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }> + %struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] } + %struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float } + %struct.IColor4 = type { float, float, float, float } + %struct.TCoord2 = type { float, float } + %struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 } + %struct.VMTextures = type { [16 x %struct.TextureRec*] } + %struct.PPStreamToken = type { { i16, i16, i32 } } + %struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } } + +define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk) nounwind { +bb266.i: + getelementptr <4 x float>, <4 x float>* null, i32 11 ; <<4 x float>*>:0 [#uses=1] + load <4 x float>, <4 x float>* %0, align 16 ; <<4 x float>>:1 [#uses=1] + shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> < i32 0, i32 1, i32 1, i32 1 > ; <<4 x float>>:2 [#uses=1] + shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:3 [#uses=1] + shufflevector <4 x float> undef, <4 x float> poison, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>>:4 [#uses=1] + shufflevector <4 x float> %4, <4 x float> %3, <4 x i32> < i32 6, i32 7, i32 2, i32 3 > ; <<4 x float>>:5 [#uses=1] + fmul <4 x float> %5, zeroinitializer ; <<4 x float>>:6 [#uses=2] + fmul <4 x float> %6, %6 ; <<4 x float>>:7 [#uses=1] + fadd <4 x float> zeroinitializer, %7 ; <<4 x float>>:8 [#uses=1] + call <4 x float> @llvm.x86.sse.max.ps( <4 x float> zeroinitializer, <4 x float> %8 ) nounwind readnone ; <<4 x float>>:9 [#uses=1] + %phitmp40 = bitcast <4 x float> %9 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp4109.i = and <4 x i32> %phitmp40, < i32 8388607, i32 8388607, i32 8388607, i32 8388607 > ; <<4 x i32>> [#uses=1] + %tmp4116.i = or <4 x i32> %tmp4109.i, < i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216 > ; <<4 x i32>> [#uses=1] + %tmp4117.i = bitcast <4 x i32> %tmp4116.i to <4 x float> ; <<4 x float>> [#uses=1] + fadd <4 x float> %tmp4117.i, zeroinitializer ; <<4 x float>>:10 [#uses=1] + fmul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 > ; <<4 x float>>:11 [#uses=1] + call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %11, <4 x float> zeroinitializer ) nounwind readnone ; <<4 x float>>:12 [#uses=1] + call <4 x float> @llvm.x86.sse.min.ps( <4 x float> %12, <4 x float> zeroinitializer ) nounwind readnone ; <<4 x float>>:13 [#uses=1] + %tmp4170.i = call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %13, <4 x float> zeroinitializer, i8 2 ) nounwind ; <<4 x float>> [#uses=1] + bitcast <4 x float> %tmp4170.i to <16 x i8> ; <<16 x i8>>:14 [#uses=1] + call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %14 ) nounwind readnone ; :15 [#uses=1] + icmp eq i32 %15, 0 ; :16 [#uses=1] + br i1 %16, label %bb5574.i, label %bb4521.i + +bb4521.i: ; preds = %bb266.i + unreachable + +bb5574.i: ; preds = %bb266.i + unreachable +} + +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone + +declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone + +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone diff --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll index a4cf135..281c69a 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll @@ -13,13 +13,13 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg define double @elts_addsub_v2f64(<2 x double> %0, <2 x double> %1) { ; CHECK-LABEL: @elts_addsub_v2f64( -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP0:%.*]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 ; CHECK-NEXT: ret double [[TMP5]] ; - %3 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> - %4 = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> + %3 = shufflevector <2 x double> %0, <2 x double> poison, <2 x i32> + %4 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> %5 = tail call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %3, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 ret double %6 @@ -31,8 +31,8 @@ define double @elts_addsub_v2f64_sub(<2 x double> %0, <2 x double> %1) { ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 ; CHECK-NEXT: ret double [[TMP4]] ; - %3 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> - %4 = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> + %3 = shufflevector <2 x double> %0, <2 x double> poison, <2 x i32> + %4 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> %5 = tail call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %3, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 ret double %6 @@ -46,8 +46,8 @@ define float @elts_addsub_v4f32(<4 x float> %0, <4 x float> %1) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[TMP6]] ; - %3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> - %4 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> + %3 = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> + %4 = shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> %5 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %3, <4 x float> %4) %6 = extractelement <4 x float> %5, i32 0 %7 = extractelement <4 x float> %5, i32 1 @@ -63,8 +63,8 @@ define float @elts_addsub_v4f32_add(<4 x float> %0, <4 x float> %1) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[TMP6]] ; - %3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> - %4 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> + %3 = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> + %4 = shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> %5 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %3, <4 x float> %4) %6 = extractelement <4 x float> %5, i32 1 %7 = extractelement <4 x float> %5, i32 3 @@ -80,8 +80,8 @@ define double @elts_addsub_v4f64(<4 x double> %0, <4 x double> %1) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret double [[TMP6]] ; - %3 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> - %4 = shufflevector <4 x double> %1, <4 x double> undef, <4 x i32> + %3 = shufflevector <4 x double> %0, <4 x double> poison, <4 x i32> + %4 = shufflevector <4 x double> %1, <4 x double> poison, <4 x i32> %5 = tail call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %3, <4 x double> %4) %6 = extractelement <4 x double> %5, i32 0 %7 = extractelement <4 x double> %5, i32 1 @@ -97,8 +97,8 @@ define double @elts_addsub_v4f64_add(<4 x double> %0, <4 x double> %1) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret double [[TMP6]] ; - %3 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> - %4 = shufflevector <4 x double> %1, <4 x double> undef, <4 x i32> + %3 = shufflevector <4 x double> %0, <4 x double> poison, <4 x i32> + %4 = shufflevector <4 x double> %1, <4 x double> poison, <4 x i32> %5 = tail call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %3, <4 x double> %4) %6 = extractelement <4 x double> %5, i32 1 %7 = extractelement <4 x double> %5, i32 3 @@ -114,8 +114,8 @@ define float @elts_addsub_v8f32(<8 x float> %0, <8 x float> %1) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[TMP6]] ; - %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> - %4 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> + %3 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %4 = shufflevector <8 x float> %1, <8 x float> poison, <8 x i32> %5 = tail call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %3, <8 x float> %4) %6 = extractelement <8 x float> %5, i32 0 %7 = extractelement <8 x float> %5, i32 1 @@ -131,8 +131,8 @@ define float @elts_addsub_v8f32_sub(<8 x float> %0, <8 x float> %1) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[TMP6]] ; - %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> - %4 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> + %3 = shufflevector <8 x float> %0, <8 x float> poison, <8 x i32> + %4 = shufflevector <8 x float> %1, <8 x float> poison, <8 x i32> %5 = tail call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %3, <8 x float> %4) %6 = extractelement <8 x float> %5, i32 0 %7 = extractelement <8 x float> %5, i32 4 @@ -181,13 +181,13 @@ define double @PR48476_fsub(<2 x double> %x) { define double @PR48476_fadd_fsub(<2 x double> %x) { ; CHECK-LABEL: @PR48476_fadd_fsub( ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]] ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 ; CHECK-NEXT: ret double [[VECEXT]] ; %t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x) - %s = shufflevector <2 x double> %t1, <2 x double> undef, <2 x i32> + %s = shufflevector <2 x double> %t1, <2 x double> poison, <2 x i32> %t2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %s, <2 x double> %x) %vecext = extractelement <2 x double> %t2, i32 0 ret double %vecext diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll new file mode 100644 index 0000000..2ffe846 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Verify that instcombine is able to fold identity shuffles. + +define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) { +; CHECK-LABEL: @identity_test_vpermd( +; CHECK-NEXT: ret <8 x i32> [[A0:%.*]] +; + %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + ret <8 x i32> %a +} + +define <8 x float> @identity_test_vpermps(<8 x float> %a0) { +; CHECK-LABEL: @identity_test_vpermps( +; CHECK-NEXT: ret <8 x float> [[A0:%.*]] +; + %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + ret <8 x float> %a +} + +; Instcombine should be able to fold the following shuffle to a builtin shufflevector +; with a shuffle mask of all zeroes. + +define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) { +; CHECK-LABEL: @zero_test_vpermd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) + ret <8 x i32> %a +} + +define <8 x float> @zero_test_vpermps(<8 x float> %a0) { +; CHECK-LABEL: @zero_test_vpermps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) + ret <8 x float> %a +} + +; Verify that instcombine is able to fold constant shuffles. + +define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) { +; CHECK-LABEL: @shuffle_test_vpermd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + ret <8 x i32> %a +} + +define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) { +; CHECK-LABEL: @shuffle_test_vpermps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + ret <8 x float> %a +} + +; Verify that instcombine is able to fold constant shuffles with undef mask elements. + +define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { +; CHECK-LABEL: @undef_test_vpermd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + ret <8 x i32> %a +} + +define <8 x float> @undef_test_vpermps(<8 x float> %a0) { +; CHECK-LABEL: @undef_test_vpermps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + ret <8 x float> %a +} + +; Verify simplify demanded elts. + +define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) { +; CHECK-LABEL: @elts_test_vpermd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %1 = insertelement <8 x i32> , i32 %a1, i32 0 + %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1) + %3 = shufflevector <8 x i32> %2, <8 x i32> poison, <8 x i32> + ret <8 x i32> %3 +} + +define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @elts_test_vpermps( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x float> [[TMP2]] +; + %1 = insertelement <8 x i32> %a1, i32 0, i32 7 + %2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1) + %3 = shufflevector <8 x float> %2, <8 x float> poison, <8 x i32> zeroinitializer + ret <8 x float> %3 +} + +declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) +declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll new file mode 100644 index 0000000..bc0b6792 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) +declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) + +; +; Vector Demanded Elts +; + +; Only bottom 4 elements required. +define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) { +; CHECK-LABEL: @demand_vcvtph2ps_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <4 x half> +; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float> +; CHECK-NEXT: ret <4 x float> [[CVTPH2PS]] +; + %1 = shufflevector <8 x i16> %A, <8 x i16> poison, <8 x i32> + %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1) + ret <4 x float> %2 +} + +; All 8 elements required. +define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) { +; CHECK-LABEL: @demand_vcvtph2ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <8 x half> +; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <8 x half> [[TMP2]] to <8 x float> +; CHECK-NEXT: ret <8 x float> [[CVTPH2PS]] +; + %1 = shufflevector <8 x i16> %A, <8 x i16> poison, <8 x i32> + %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1) + ret <8 x float> %2 +} + +; +; Constant Folding +; + +define <4 x float> @fold_vcvtph2ps_128() { +; CHECK-LABEL: @fold_vcvtph2ps_128( +; CHECK-NEXT: ret <4 x float> +; + %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> ) + ret <4 x float> %1 +} + +define <8 x float> @fold_vcvtph2ps_256() { +; CHECK-LABEL: @fold_vcvtph2ps_256( +; CHECK-NEXT: ret <8 x float> +; + %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> ) + ret <8 x float> %1 +} + +define <4 x float> @fold_vcvtph2ps_128_zero() { +; CHECK-LABEL: @fold_vcvtph2ps_128_zero( +; CHECK-NEXT: ret <4 x float> zeroinitializer +; + %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> ) + ret <4 x float> %1 +} + +define <8 x float> @fold_vcvtph2ps_256_zero() { +; CHECK-LABEL: @fold_vcvtph2ps_256_zero( +; CHECK-NEXT: ret <8 x float> zeroinitializer +; + %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> ) + ret <8 x float> %1 +} diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll new file mode 100644 index 0000000..d7a2c82 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll @@ -0,0 +1,281 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; +; UNDEF Elts +; + +define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @undef_pmuludq_128( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef) + ret <2 x i64> %1 +} + +define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @undef_pmuludq_256( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; + %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef) + ret <4 x i64> %1 +} + +define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @undef_pmuludq_512( +; CHECK-NEXT: ret <8 x i64> zeroinitializer +; + %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef) + ret <8 x i64> %1 +} + +define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @undef_pmuldq_128( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef) + ret <2 x i64> %1 +} + +define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @undef_pmuldq_256( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; + %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef) + ret <4 x i64> %1 +} + +define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @undef_pmuldq_512( +; CHECK-NEXT: ret <8 x i64> zeroinitializer +; + %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef) + ret <8 x i64> %1 +} + +define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuludq_128( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer) + ret <2 x i64> %1 +} + +define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuludq_256( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; + %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef) + ret <4 x i64> %1 +} + +define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuludq_512( +; CHECK-NEXT: ret <8 x i64> zeroinitializer +; + %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer) + ret <8 x i64> %1 +} + +define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuldq_128( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef) + ret <2 x i64> %1 +} + +define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuldq_256( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; + %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer) + ret <4 x i64> %1 +} + +define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @undef_zero_pmuldq_512( +; CHECK-NEXT: ret <8 x i64> zeroinitializer +; + %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef) + ret <8 x i64> %1 +} + +; +; Constant Folding +; + +define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @fold_pmuludq_128( +; CHECK-NEXT: ret <2 x i64> +; + %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> , <4 x i32> ) + ret <2 x i64> %1 +} + +define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @fold_pmuludq_256( +; CHECK-NEXT: ret <4 x i64> zeroinitializer +; + %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer) + ret <4 x i64> %1 +} + +define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @fold_pmuludq_512( +; CHECK-NEXT: ret <8 x i64> +; + %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> , <16 x i32> ) + ret <8 x i64> %1 +} + +define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @fold_pmuldq_128( +; CHECK-NEXT: ret <2 x i64> +; + %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> , <4 x i32> ) + ret <2 x i64> %1 +} + +define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @fold_pmuldq_256( +; CHECK-NEXT: ret <4 x i64> +; + %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> , <8 x i32> ) + ret <4 x i64> %1 +} + +define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @fold_pmuldq_512( +; CHECK-NEXT: ret <8 x i64> zeroinitializer +; + %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> ) + ret <8 x i64> %1 +} + +; +; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required. +; + +define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @test_demanded_elts_pmuludq_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = and <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x i64> [[TMP8]] +; + %1 = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> + %2 = shufflevector <4 x i32> %a1, <4 x i32> poison, <4 x i32> + %3 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %1, <4 x i32> %2) + %4 = shufflevector <2 x i64> %3, <2 x i64> poison, <2 x i32> zeroinitializer + ret <2 x i64> %4 +} + +define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @test_demanded_elts_pmuludq_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret <4 x i64> [[TMP7]] +; + %1 = shufflevector <8 x i32> %a0, <8 x i32> poison, <8 x i32> + %2 = shufflevector <8 x i32> %a1, <8 x i32> poison, <8 x i32> + %3 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %1, <8 x i32> %2) + ret <4 x i64> %3 +} + +define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @test_demanded_elts_pmuludq_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <16 x i32> + %2 = shufflevector <16 x i32> %a1, <16 x i32> poison, <16 x i32> + %3 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %1, <16 x i32> %2) + ret <8 x i64> %3 +} + +define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @test_demanded_elts_pmuldq_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]] +; CHECK-NEXT: ret <2 x i64> [[TMP9]] +; + %1 = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> + %2 = shufflevector <4 x i32> %a1, <4 x i32> poison, <4 x i32> + %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2) + ret <2 x i64> %3 +} + +define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @test_demanded_elts_pmuldq_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i64> [[TMP10]] +; + %1 = shufflevector <8 x i32> %a0, <8 x i32> poison, <8 x i32> + %2 = shufflevector <8 x i32> %a1, <8 x i32> poison, <8 x i32> + %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %1, <8 x i32> %2) + %4 = shufflevector <4 x i64> %3, <4 x i64> poison, <4 x i32> + ret <4 x i64> %4 +} + +define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @test_demanded_elts_pmuldq_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <16 x i32> + %2 = shufflevector <16 x i32> %a1, <16 x i32> poison, <16 x i32> + %3 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %1, <16 x i32> %2) + %4 = shufflevector <8 x i64> %3, <8 x i64> poison, <8 x i32> + ret <8 x i64> %4 +} + +declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone + +declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone +declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone + +declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) nounwind readnone +declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) nounwind readnone diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll index 5f3b991..ff8842c 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll @@ -208,26 +208,26 @@ define <64 x i8> @fold_packuswb_512() { define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> - %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> + %2 = shufflevector <4 x i32> %a1, <4 x i32> poison, <4 x i32> %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) - %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> + %4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> ret <8 x i16> %4 } define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = insertelement <4 x i32> %a0, i32 0, i32 0 %2 = insertelement <4 x i32> %a1, i32 0, i32 3 %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2) - %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> + %4 = shufflevector <8 x i16> %3, <8 x i16> poison, <8 x i32> ret <8 x i16> %4 } @@ -238,7 +238,7 @@ define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { %1 = insertelement <8 x i16> %a0, i16 0, i32 0 %2 = insertelement <8 x i16> %a1, i16 0, i32 0 %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) - %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> + %4 = shufflevector <16 x i8> %3, <16 x i8> poison, <16 x i32> ret <16 x i8> %4 } @@ -249,34 +249,34 @@ define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { %1 = insertelement <8 x i16> poison, i16 0, i32 0 %2 = insertelement <8 x i16> poison, i16 0, i32 0 %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) - %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> + %4 = shufflevector <16 x i8> %3, <16 x i8> poison, <16 x i32> ret <16 x i8> %4 } define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; - %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> - %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> + %1 = shufflevector <8 x i32> %a0, <8 x i32> poison, <8 x i32> + %2 = shufflevector <8 x i32> %a1, <8 x i32> poison, <8 x i32> %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) - %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> + %4 = shufflevector <16 x i16> %3, <16 x i16> poison, <16 x i32> ret <16 x i16> %4 } define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> poison, <8 x i32> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; - %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> - %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> + %1 = shufflevector <8 x i32> %a0, <8 x i32> poison, <8 x i32> + %2 = shufflevector <8 x i32> %a1, <8 x i32> poison, <8 x i32> %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2) - %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> + %4 = shufflevector <16 x i16> %3, <16 x i16> poison, <16 x i32> ret <16 x i16> %4 } @@ -287,7 +287,7 @@ define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { %1 = insertelement <16 x i16> %a0, i16 0, i32 0 %2 = insertelement <16 x i16> %a1, i16 0, i32 8 %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) - %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> + %4 = shufflevector <32 x i8> %3, <32 x i8> poison, <32 x i32> ret <32 x i8> %4 } @@ -298,34 +298,34 @@ define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { %1 = insertelement <16 x i16> poison, i16 0, i32 1 %2 = insertelement <16 x i16> poison, i16 0, i32 0 %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) - %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer + %4 = shufflevector <32 x i8> %3, <32 x i8> poison, <32 x i32> zeroinitializer ret <32 x i8> %4 } define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; - %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> - %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> + %1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <16 x i32> + %2 = shufflevector <16 x i32> %a1, <16 x i32> poison, <16 x i32> %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) - %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> + %4 = shufflevector <32 x i16> %3, <32 x i16> poison, <32 x i32> ret <32 x i16> %4 } define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> poison, <16 x i32> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; - %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> - %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> + %1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <16 x i32> + %2 = shufflevector <16 x i32> %a1, <16 x i32> poison, <16 x i32> %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2) - %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> + %4 = shufflevector <32 x i16> %3, <32 x i16> poison, <32 x i32> ret <32 x i16> %4 } @@ -338,7 +338,7 @@ define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { %3 = insertelement <32 x i16> %1, i16 0, i32 16 %4 = insertelement <32 x i16> %2, i16 0, i32 24 %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4) - %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> + %6 = shufflevector <64 x i8> %5, <64 x i8> poison, <64 x i32> ret <64 x i8> %6 } @@ -349,7 +349,7 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { %1 = insertelement <32 x i16> poison, i16 0, i32 1 %2 = insertelement <32 x i16> poison, i16 0, i32 0 %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2) - %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer + %4 = shufflevector <64 x i8> %3, <64 x i8> poison, <64 x i32> zeroinitializer ret <64 x i8> %4 } diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll new file mode 100644 index 0000000..f5094f8 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll @@ -0,0 +1,515 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s + +; Verify that instcombine is able to fold identity shuffles. + +define <16 x i8> @identity_test(<16 x i8> %InVec) { +; CHECK-LABEL: @identity_test( +; CHECK-NEXT: ret <16 x i8> [[INVEC:%.*]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @identity_test_avx2( +; CHECK-NEXT: ret <32 x i8> [[INVEC:%.*]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @identity_test_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @identity_test_avx512( +; CHECK-NEXT: ret <64 x i8> [[INVEC:%.*]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +; Verify that instcombine is able to fold byte shuffles with zero masks. + +define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector( +; CHECK-NEXT: ret <16 x i8> zeroinitializer +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector_avx2( +; CHECK-NEXT: ret <32 x i8> zeroinitializer +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @fold_to_zero_vector_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector_avx512( +; CHECK-NEXT: ret <64 x i8> zeroinitializer +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector +; with a shuffle mask of all zeroes. + +define <16 x i8> @splat_test(<16 x i8> %InVec) { +; CHECK-LABEL: @splat_test( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer) + ret <16 x i8> %1 +} + +; In the test case below, elements in the low 128-bit lane of the result +; vector are equal to the lower byte of %InVec (shuffle index 0). +; Elements in the high 128-bit lane of the result vector are equal to +; the lower byte in the high 128-bit lane of %InVec (shuffle index 16). + +define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @splat_test_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer) + ret <32 x i8> %1 +} + +define <64 x i8> @splat_test_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @splat_test_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> zeroinitializer) + ret <64 x i8> %1 +} + +; Each of the byte shuffles in the following tests is equivalent to a blend between +; vector %InVec and a vector of all zeroes. + +define <16 x i8> @blend1(<16 x i8> %InVec) { +; CHECK-LABEL: @blend1( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @blend2(<16 x i8> %InVec) { +; CHECK-LABEL: @blend2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @blend3(<16 x i8> %InVec) { +; CHECK-LABEL: @blend3( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @blend4(<16 x i8> %InVec) { +; CHECK-LABEL: @blend4( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @blend5(<16 x i8> %InVec) { +; CHECK-LABEL: @blend5( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @blend6(<16 x i8> %InVec) { +; CHECK-LABEL: @blend6( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @blend1_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @blend1_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @blend2_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @blend2_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @blend3_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @blend3_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @blend4_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @blend4_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @blend5_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @blend5_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @blend6_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @blend6_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @blend1_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend1_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend2_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend2_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend3_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend3_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend4_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend4_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend5_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend5_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend6_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend6_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +; movq idiom. +define <16 x i8> @movq_idiom(<16 x i8> %InVec) { +; CHECK-LABEL: @movq_idiom( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @movq_idiom_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @movq_idiom_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @movq_idiom_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +; Vector permutations using byte shuffles. + +define <16 x i8> @permute1(<16 x i8> %InVec) { +; CHECK-LABEL: @permute1( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @permute2(<16 x i8> %InVec) { +; CHECK-LABEL: @permute2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @permute1_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @permute1_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @permute2_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @permute2_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @permute1_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @permute1_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @permute2_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @permute2_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +; Test that instcombine correctly folds a pshufb with values that +; are not -128 and that are not encoded in four bits. + +define <16 x i8> @identity_test2_2(<16 x i8> %InVec) { +; CHECK-LABEL: @identity_test2_2( +; CHECK-NEXT: ret <16 x i8> [[INVEC:%.*]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) { +; CHECK-LABEL: @identity_test_avx2_2( +; CHECK-NEXT: ret <32 x i8> [[INVEC:%.*]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @identity_test_avx512_2(<64 x i8> %InVec) { +; CHECK-LABEL: @identity_test_avx512_2( +; CHECK-NEXT: ret <64 x i8> [[INVEC:%.*]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector_2( +; CHECK-NEXT: ret <16 x i8> zeroinitializer +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector_avx2_2( +; CHECK-NEXT: ret <32 x i8> zeroinitializer +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @fold_to_zero_vector_avx512_2(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector_avx512_2( +; CHECK-NEXT: ret <64 x i8> zeroinitializer +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <16 x i8> @permute3(<16 x i8> %InVec) { +; CHECK-LABEL: @permute3( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @permute3_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @permute3_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @permute3_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @permute3_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +; FIXME: Verify that instcombine is able to fold constant byte shuffles with undef mask elements. + +define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) { +; CHECK-LABEL: @fold_with_undef_elts( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) + ret <16 x i8> %1 +} + +define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @fold_with_undef_elts_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) + ret <32 x i8> %1 +} + +define <64 x i8> @fold_with_undef_elts_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_with_undef_elts_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) { +; CHECK-LABEL: @fold_with_allundef_elts( +; CHECK-NEXT: ret <16 x i8> undef +; + %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef) + ret <16 x i8> %1 +} + +define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) { +; CHECK-LABEL: @fold_with_allundef_elts_avx2( +; CHECK-NEXT: ret <32 x i8> undef +; + %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef) + ret <32 x i8> %1 +} + +define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_with_allundef_elts_avx512( +; CHECK-NEXT: ret <64 x i8> undef +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> undef) + ret <64 x i8> %1 +} + +; Demanded elts tests. + +define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) { +; CHECK-LABEL: @demanded_elts_insertion( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[BASEMASK:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0 + %2 = insertelement <16 x i8> %1, i8 %M15, i32 15 + %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %2) + %4 = shufflevector <16 x i8> %3, <16 x i8> poison, <16 x i32> + ret <16 x i8> %4 +} + +define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) { +; CHECK-LABEL: @demanded_elts_insertion_avx2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[BASEMASK:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP2]] +; + %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0 + %2 = insertelement <32 x i8> %1, i8 %M22, i32 22 + %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %2) + %4 = shufflevector <32 x i8> %3, <32 x i8> poison, <32 x i32> + ret <32 x i8> %4 +} + +define <64 x i8> @demanded_elts_insertion_avx512(<64 x i8> %InVec, <64 x i8> %BaseMask, i8 %M0, i8 %M30) { +; CHECK-LABEL: @demanded_elts_insertion_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <64 x i8> poison, i8 [[M0:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> [[INVEC:%.*]], <64 x i8> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %1 = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0 + %2 = insertelement <64 x i8> %1, i8 %M30, i32 30 + %3 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %2) + %4 = shufflevector <64 x i8> %3, <64 x i8> poison, <64 x i32> zeroinitializer + ret <64 x i8> %4 +} + +declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll new file mode 100644 index 0000000..3270ca13 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll @@ -0,0 +1,420 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s + +; +; EXTRQ +; + +define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_call( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) [[ATTR1:#.*]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_zero_arg0( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_zero_arg1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_to_extqi( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 15) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> ) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_constant( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> , <16 x i8> ) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_constant_undef( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> , <16 x i8> ) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) { +; CHECK-LABEL: @test_extrq_call_constexpr( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> to <16 x i8>)) + ret <2 x i64> %1 +} + +; +; EXTRQI +; + +define <2 x i64> @test_extrqi_call(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_call( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 23) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23) + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_shuffle_1zuu( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32) + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16) + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrqi_undef(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_undef( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33) + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrqi_zero(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_zero( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18) + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrqi_constant(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_constant( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> , i8 3, i8 18) + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_constant_undef( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> , i8 4, i8 18) + ret <2 x i64> %1 +} + +define <2 x i64> @test_extrqi_call_constexpr() { +; CHECK-LABEL: @test_extrqi_call_constexpr( +; CHECK-NEXT: ret <2 x i64> zeroinitializer +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16) + ret <2 x i64> %1 +} + +; +; INSERTQ +; + +define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertq_call( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertq_to_insertqi( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> , i8 18, i8 2) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> ) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertq_constant( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> , <2 x i64> ) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertq_constant_undef( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> , <2 x i64> ) nounwind + ret <2 x i64> %1 +} + +define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) { +; CHECK-LABEL: @test_insertq_call_constexpr( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> , i8 2, i8 0) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>)) + ret <2 x i64> %1 +} + +; +; INSERTQI +; + +define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { +; CHECK-LABEL: @test_insertqi_shuffle_04uu( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = bitcast <16 x i8> %v to <2 x i64> + %2 = bitcast <16 x i8> %i to <2 x i64> + %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32) + %4 = bitcast <2 x i64> %3 to <16 x i8> + ret <16 x i8> %4 +} + +define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { +; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = bitcast <16 x i8> %v to <2 x i64> + %2 = bitcast <16 x i8> %i to <2 x i64> + %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0) + %4 = bitcast <2 x i64> %3 to <16 x i8> + ret <16 x i8> %4 +} + +define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) { +; CHECK-LABEL: @test_insertqi_constant( +; CHECK-NEXT: ret <2 x i64> +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> , <2 x i64> , i8 16, i8 1) + ret <2 x i64> %1 +} + +define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) { +; CHECK-LABEL: @test_insertqi_call_constexpr( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> , i8 48, i8 3) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3) + ret <2 x i64> %1 +} + +; The result of this insert is the second arg, since the top 64 bits of +; the result are undefined, and we copy the bottom 64 bits from the +; second arg +define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { +; CHECK-LABEL: @testInsert64Bits( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0) + ret <2 x i64> %1 +} + +define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { +; CHECK-LABEL: @testZeroLength( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) + ret <2 x i64> %1 +} + +define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) { +; CHECK-LABEL: @testUndefinedInsertq_1( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16) + ret <2 x i64> %1 +} + +define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) { +; CHECK-LABEL: @testUndefinedInsertq_2( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32) + ret <2 x i64> %1 +} + +define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) { +; CHECK-LABEL: @testUndefinedInsertq_3( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16) + ret <2 x i64> %1 +} + +; +; Vector Demanded Bits +; + +define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> + %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind + ret <2 x i64> %2 +} + +define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_arg1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <16 x i8> %y, <16 x i8> poison, <16 x i32> + %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind + ret <2 x i64> %2 +} + +define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_args01( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> + %2 = shufflevector <16 x i8> %y, <16 x i8> poison, <16 x i32> + %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind + ret <2 x i64> %3 +} + +define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_extrq_ret( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind + %2 = shufflevector <2 x i64> %1, <2 x i64> poison, <2 x i32> + ret <2 x i64> %2 +} + +define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 3, i8 2) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> + %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2) + ret <2 x i64> %2 +} + +define <2 x i64> @test_extrqi_ret(<2 x i64> %x) { +; CHECK-LABEL: @test_extrqi_ret( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind + %2 = shufflevector <2 x i64> %1, <2 x i64> poison, <2 x i32> + ret <2 x i64> %2 +} + +define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertq_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> + %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind + ret <2 x i64> %2 +} + +define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertq_ret( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind + %2 = shufflevector <2 x i64> %1, <2 x i64> poison, <2 x i32> + ret <2 x i64> %2 +} + +define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertqi_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> + %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind + ret <2 x i64> %2 +} + +define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertqi_arg1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <2 x i64> %y, <2 x i64> poison, <2 x i32> + %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind + ret <2 x i64> %2 +} + +define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertqi_args01( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) [[ATTR1]] +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %1 = shufflevector <2 x i64> %x, <2 x i64> poison, <2 x i32> + %2 = shufflevector <2 x i64> %y, <2 x i64> poison, <2 x i32> + %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind + ret <2 x i64> %3 +} + +define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @test_insertqi_ret( +; CHECK-NEXT: ret <2 x i64> undef +; + %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind + %2 = shufflevector <2 x i64> %1, <2 x i64> poison, <2 x i32> + ret <2 x i64> %2 +} + +; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq +declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind + +; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi +declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind + +; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq +declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind + +; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi +declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll index e23e222..38a3a6f 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll @@ -2923,12 +2923,12 @@ define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) { define <4 x i32> @avx2_psrav_d_128_masked_shuffle(<4 x i32> %v, <4 x i32> %a) { ; CHECK-LABEL: @avx2_psrav_d_128_masked_shuffle( ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %1 = and <4 x i32> %a, - %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <4 x i32> %3 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %2) ret <4 x i32> %3 } @@ -3030,7 +3030,7 @@ define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } @@ -3041,7 +3041,7 @@ define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = bitcast <2 x i64> %1 to <8 x i16> %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2) ret <8 x i16> %3 @@ -3052,7 +3052,7 @@ define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } @@ -3063,7 +3063,7 @@ define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = bitcast <8 x i16> %1 to <4 x i32> %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2) ret <4 x i32> %3 @@ -3074,7 +3074,7 @@ define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } @@ -3084,7 +3084,7 @@ define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } @@ -3094,7 +3094,7 @@ define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } @@ -3104,7 +3104,7 @@ define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } @@ -3114,7 +3114,7 @@ define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1) ret <32 x i16> %2 } @@ -3124,7 +3124,7 @@ define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1) ret <16 x i32> %2 } @@ -3134,7 +3134,7 @@ define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1) ret <8 x i64> %2 } @@ -3144,7 +3144,7 @@ define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } @@ -3154,7 +3154,7 @@ define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } @@ -3164,7 +3164,7 @@ define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } @@ -3174,7 +3174,7 @@ define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } @@ -3185,7 +3185,7 @@ define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; - %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + %1 = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> %2 = bitcast <16 x i8> %1 to <8 x i16> %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2) ret <16 x i16> %3 @@ -3196,7 +3196,7 @@ define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } @@ -3207,7 +3207,7 @@ define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = bitcast <2 x i64> %1 to <4 x i32> %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2) ret <8 x i32> %3 @@ -3218,7 +3218,7 @@ define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } @@ -3228,7 +3228,7 @@ define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1) ret <32 x i16> %2 } @@ -3239,7 +3239,7 @@ define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) { ; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; - %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + %1 = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> %2 = bitcast <16 x i8> %1 to <8 x i16> %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2) ret <32 x i16> %3 @@ -3250,7 +3250,7 @@ define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1) ret <16 x i32> %2 } @@ -3261,7 +3261,7 @@ define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) ; CHECK-NEXT: ret <16 x i32> [[TMP2]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = bitcast <2 x i64> %1 to <4 x i32> %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2) ret <16 x i32> %3 @@ -3272,7 +3272,7 @@ define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1) ret <8 x i64> %2 } @@ -3282,7 +3282,7 @@ define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1) ret <8 x i16> %2 } @@ -3292,7 +3292,7 @@ define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1) ret <4 x i32> %2 } @@ -3302,7 +3302,7 @@ define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } @@ -3312,7 +3312,7 @@ define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) ret <16 x i16> %2 } @@ -3322,7 +3322,7 @@ define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1) ret <8 x i32> %2 } @@ -3332,7 +3332,7 @@ define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1) ret <4 x i64> %2 } @@ -3342,7 +3342,7 @@ define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; - %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> + %1 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1) ret <32 x i16> %2 } @@ -3352,7 +3352,7 @@ define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; - %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> + %1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1) ret <16 x i32> %2 } @@ -3362,7 +3362,7 @@ define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; - %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> + %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1) ret <8 x i64> %2 } diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll new file mode 100644 index 0000000..f633a3d --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll @@ -0,0 +1,301 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Verify that instcombine is able to fold identity shuffles. + +define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) { +; CHECK-LABEL: @identity_test_vpermilvar_ps( +; CHECK-NEXT: ret <4 x float> [[V:%.*]] +; + %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> ) + ret <4 x float> %a +} + +define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) { +; CHECK-LABEL: @identity_test_vpermilvar_ps_256( +; CHECK-NEXT: ret <8 x float> [[V:%.*]] +; + %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> ) + ret <8 x float> %a +} + +define <16 x float> @identity_test_vpermilvar_ps_512(<16 x float> %v) { +; CHECK-LABEL: @identity_test_vpermilvar_ps_512( +; CHECK-NEXT: ret <16 x float> [[V:%.*]] +; + %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> ) + ret <16 x float> %a +} + +define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) { +; CHECK-LABEL: @identity_test_vpermilvar_pd( +; CHECK-NEXT: ret <2 x double> [[V:%.*]] +; + %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> ) + ret <2 x double> %a +} + +define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) { +; CHECK-LABEL: @identity_test_vpermilvar_pd_256( +; CHECK-NEXT: ret <4 x double> [[V:%.*]] +; + %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> ) + ret <4 x double> %a +} + +define <8 x double> @identity_test_vpermilvar_pd_512(<8 x double> %v) { +; CHECK-LABEL: @identity_test_vpermilvar_pd_512( +; CHECK-NEXT: ret <8 x double> [[V:%.*]] +; + %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> ) + ret <8 x double> %a +} + +; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector +; with a shuffle mask of all zeroes. + +define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) { +; CHECK-LABEL: @zero_test_vpermilvar_ps_zero( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer) + ret <4 x float> %a +} + +define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) { +; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer) + ret <8 x float> %a +} + +define <16 x float> @zero_test_vpermilvar_ps_512_zero(<16 x float> %v) { +; CHECK-LABEL: @zero_test_vpermilvar_ps_512_zero( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> zeroinitializer) + ret <16 x float> %a +} + +define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) { +; CHECK-LABEL: @zero_test_vpermilvar_pd_zero( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer) + ret <2 x double> %a +} + +define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) { +; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer) + ret <4 x double> %a +} + +define <8 x double> @zero_test_vpermilvar_pd_512_zero(<8 x double> %v) { +; CHECK-LABEL: @zero_test_vpermilvar_pd_512_zero( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> undef, <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> zeroinitializer) + ret <8 x double> %a +} + +; Verify that instcombine is able to fold constant shuffles. + +define <4 x float> @test_vpermilvar_ps(<4 x float> %v) { +; CHECK-LABEL: @test_vpermilvar_ps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> ) + ret <4 x float> %a +} + +define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) { +; CHECK-LABEL: @test_vpermilvar_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> ) + ret <8 x float> %a +} + +define <16 x float> @test_vpermilvar_ps_512(<16 x float> %v) { +; CHECK-LABEL: @test_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> ) + ret <16 x float> %a +} + +define <2 x double> @test_vpermilvar_pd(<2 x double> %v) { +; CHECK-LABEL: @test_vpermilvar_pd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> ) + ret <2 x double> %a +} + +define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) { +; CHECK-LABEL: @test_vpermilvar_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> ) + ret <4 x double> %a +} + +define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) { +; CHECK-LABEL: @test_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> undef, <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> ) + ret <8 x double> %a +} + +; Verify that instcombine is able to fold constant shuffles with undef mask elements. + +define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) { +; CHECK-LABEL: @undef_test_vpermilvar_ps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> ) + ret <4 x float> %a +} + +define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) { +; CHECK-LABEL: @undef_test_vpermilvar_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> ) + ret <8 x float> %a +} + +define <16 x float> @undef_test_vpermilvar_ps_512(<16 x float> %v) { +; CHECK-LABEL: @undef_test_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> ) + ret <16 x float> %a +} + +define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) { +; CHECK-LABEL: @undef_test_vpermilvar_pd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> ) + ret <2 x double> %a +} + +define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) { +; CHECK-LABEL: @undef_test_vpermilvar_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> ) + ret <4 x double> %a +} + +define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) { +; CHECK-LABEL: @undef_test_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> undef, <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> ) + ret <8 x double> %a +} + +; Simplify demanded elts + +define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_ps( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = insertelement <4 x i32> , i32 %a1, i32 3 + %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1) + %3 = shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> + ret <4 x float> %3 +} + +define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %1 = shufflevector <8 x i32> %a1, <8 x i32> , <8 x i32> + %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1) + %3 = shufflevector <8 x float> %2, <8 x float> poison, <8 x i32> + ret <8 x float> %3 +} + +define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) { +; CHECK-LABEL: @elts_test_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0 + %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1) + %3 = shufflevector <16 x float> %2, <16 x float> poison, <16 x i32> + ret <16 x float> %3 +} + +define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_pd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = insertelement <2 x i64> , i64 %a1, i32 1 + %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1) + %3 = shufflevector <2 x double> %2, <2 x double> poison, <2 x i32> + ret <2 x double> %3 +} + +define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { +; CHECK-LABEL: @elts_test_vpermilvar_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %1 = shufflevector <4 x i64> , <4 x i64> %a1, <4 x i32> + %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1) + %3 = shufflevector <4 x double> %2, <4 x double> poison, <4 x i32> + ret <4 x double> %3 +} + +define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) { +; CHECK-LABEL: @elts_test_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i64> poison, i64 [[A2:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[A0:%.*]], <8 x i64> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0 + %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1) + %3 = shufflevector <8 x double> %2, <8 x double> poison, <8 x i32> zeroinitializer + ret <8 x double> %3 +} + +declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) +declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) +declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>) + +declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) +declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) +declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>) diff --git a/llvm/test/Transforms/InstCombine/assume-inseltpoison.ll b/llvm/test/Transforms/InstCombine/assume-inseltpoison.ll new file mode 100644 index 0000000..8c04c4a --- /dev/null +++ b/llvm/test/Transforms/InstCombine/assume-inseltpoison.ll @@ -0,0 +1,656 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S -instcombine-infinite-loop-threshold=2 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.assume(i1) #1 + +; Check that the alignment has been upgraded and that the assume has not +; been removed: + +define i32 @foo1(i32* %a) #0 { +; CHECK-LABEL: @foo1( +; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[A:%.*]], align 32 +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: ret i32 [[T0]] +; + %t0 = load i32, i32* %a, align 4 + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + ret i32 %t0 +} + +; Same check as in @foo1, but make sure it works if the assume is first too. + +define i32 @foo2(i32* %a) #0 { +; CHECK-LABEL: @foo2( +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A:%.*]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[A]], align 32 +; CHECK-NEXT: ret i32 [[T0]] +; + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + %t0 = load i32, i32* %a, align 4 + ret i32 %t0 +} + +define i32 @simple(i32 %a) #1 { +; CHECK-LABEL: @simple( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 4 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 4 +; + %cmp = icmp eq i32 %a, 4 + tail call void @llvm.assume(i1 %cmp) + ret i32 %a +} + +define i32 @can1(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @can1( +; CHECK-NEXT: call void @llvm.assume(i1 [[A:%.*]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[B:%.*]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[C:%.*]]) +; CHECK-NEXT: ret i32 5 +; + %and1 = and i1 %a, %b + %and = and i1 %and1, %c + tail call void @llvm.assume(i1 %and) + ret i32 5 +} + +define i32 @can2(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @can2( +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: ret i32 5 +; + %v = or i1 %a, %b + %w = xor i1 %v, 1 + tail call void @llvm.assume(i1 %w) + ret i32 5 +} + +define i32 @bar1(i32 %a) #0 { +; CHECK-LABEL: @bar1( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 1 +; + %and1 = and i32 %a, 3 + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.assume(i1 %cmp) + ret i32 %and1 +} + +define i32 @bar2(i32 %a) #0 { +; CHECK-LABEL: @bar2( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 1 +; + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.assume(i1 %cmp) + %and1 = and i32 %a, 3 + ret i32 %and1 +} + +define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 { +; CHECK-LABEL: @bar3( +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call void @llvm.assume(i1 [[X:%.*]]) +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[Y:%.*]]) +; CHECK-NEXT: ret i32 1 +; +entry: + %and1 = and i32 %a, 3 + +; Don't be fooled by other assumes around. + + tail call void @llvm.assume(i1 %x) + + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.assume(i1 %cmp) + + tail call void @llvm.assume(i1 %y) + + ret i32 %and1 +} + +define i32 @bar4(i32 %a, i32 %b) { +; CHECK-LABEL: @bar4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[A]], [[B:%.*]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]]) +; CHECK-NEXT: ret i32 1 +; +entry: + %and1 = and i32 %b, 3 + %and = and i32 %a, 7 + %cmp = icmp eq i32 %and, 1 + tail call void @llvm.assume(i1 %cmp) + %cmp2 = icmp eq i32 %a, %b + tail call void @llvm.assume(i1 %cmp2) + ret i32 %and1 +} + +define i32 @icmp1(i32 %a) #0 { +; CHECK-LABEL: @icmp1( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 1 +; + %cmp = icmp sgt i32 %a, 5 + tail call void @llvm.assume(i1 %cmp) + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @icmp2(i32 %a) #0 { +; CHECK-LABEL: @icmp2( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i32 0 +; + %cmp = icmp sgt i32 %a, 5 + tail call void @llvm.assume(i1 %cmp) + %t0 = zext i1 %cmp to i32 + %lnot.ext = xor i32 %t0, 1 + ret i32 %lnot.ext +} + +; If the 'not' of a condition is known true, then the condition must be false. + +define i1 @assume_not(i1 %cond) { +; CHECK-LABEL: @assume_not( +; CHECK-NEXT: [[NOTCOND:%.*]] = xor i1 [[COND:%.*]], true +; CHECK-NEXT: call void @llvm.assume(i1 [[NOTCOND]]) +; CHECK-NEXT: ret i1 false +; + %notcond = xor i1 %cond, true + call void @llvm.assume(i1 %notcond) + ret i1 %cond +} + +declare void @escape(i32* %a) + +; Canonicalize a nonnull assumption on a load into metadata form. + +define i32 @bundle1(i32* %P) { +; CHECK-LABEL: @bundle1( +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "nonnull"(i32* [[P:%.*]]) ] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + tail call void @llvm.assume(i1 true) ["nonnull"(i32* %P)] + %load = load i32, i32* %P + ret i32 %load +} + +define i32 @bundle2(i32* %P) { +; CHECK-LABEL: @bundle2( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + tail call void @llvm.assume(i1 true) ["ignore"(i32* undef)] + %load = load i32, i32* %P + ret i32 %load +} + +define i1 @nonnull1(i32** %a) { +; CHECK-LABEL: @nonnull1( +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8, !nonnull !6 +; CHECK-NEXT: tail call void @escape(i32* nonnull [[LOAD]]) +; CHECK-NEXT: ret i1 false +; + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + tail call void @llvm.assume(i1 %cmp) + tail call void @escape(i32* %load) + %rval = icmp eq i32* %load, null + ret i1 %rval +} + +; Make sure the above canonicalization applies only +; to pointer types. Doing otherwise would be illegal. + +define i1 @nonnull2(i32* %a) { +; CHECK-LABEL: @nonnull2( +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i1 false +; + %load = load i32, i32* %a + %cmp = icmp ne i32 %load, 0 + tail call void @llvm.assume(i1 %cmp) + %rval = icmp eq i32 %load, 0 + ret i1 %rval +} + +; Make sure the above canonicalization does not trigger +; if the assume is control dependent on something else + +define i1 @nonnull3(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i1 false +; CHECK: not_taken: +; CHECK-NEXT: [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null +; CHECK-NEXT: ret i1 [[RVAL_2]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + tail call void @llvm.assume(i1 %cmp) + %rval = icmp eq i32* %load, null + ret i1 %rval +not_taken: + %rval.2 = icmp sgt i32* %load, null + ret i1 %rval.2 +} + +; Make sure the above canonicalization does not trigger +; if the path from the load to the assume is potentially +; interrupted by an exception being thrown + +define i1 @nonnull4(i32** %a) { +; CHECK-LABEL: @nonnull4( +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: tail call void @escape(i32* [[LOAD]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i1 false +; + %load = load i32*, i32** %a + ;; This call may throw! + tail call void @escape(i32* %load) + %cmp = icmp ne i32* %load, null + tail call void @llvm.assume(i1 %cmp) + %rval = icmp eq i32* %load, null + ret i1 %rval +} +define i1 @nonnull5(i32** %a) { +; CHECK-LABEL: @nonnull5( +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: tail call void @escape(i32* [[LOAD]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32* [[LOAD]], null +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i1 false +; + %load = load i32*, i32** %a + ;; This call may throw! + tail call void @escape(i32* %load) + %integral = ptrtoint i32* %load to i64 + %cmp = icmp slt i64 %integral, 0 + tail call void @llvm.assume(i1 %cmp) ; %load has at least highest bit set + %rval = icmp eq i32* %load, null + ret i1 %rval +} + +; PR35846 - https://bugs.llvm.org/show_bug.cgi?id=35846 + +define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) { +; CHECK-LABEL: @assumption_conflicts_with_known_bits( +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B:%.*]], 3 +; CHECK-NEXT: tail call void @llvm.assume(i1 false) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[AND1]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]]) +; CHECK-NEXT: ret i32 0 +; + %and1 = and i32 %b, 3 + %B1 = lshr i32 %and1, %and1 + %B3 = shl nuw nsw i32 %and1, %B1 + %cmp = icmp eq i32 %B3, 1 + tail call void @llvm.assume(i1 %cmp) + %cmp2 = icmp eq i32 %B1, %B3 + tail call void @llvm.assume(i1 %cmp2) + ret i32 %and1 +} + +; PR37726 - https://bugs.llvm.org/show_bug.cgi?id=37726 +; There's a loophole in eliminating a redundant assumption when +; we have conflicting assumptions. Verify that debuginfo doesn't +; get in the way of the fold. + +define void @debug_interference(i8 %x) { +; CHECK-LABEL: @debug_interference( +; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 false) +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7:metadata !.*]], metadata !DIExpression()), [[DBG9:!dbg !.*]] +; CHECK-NEXT: tail call void @llvm.assume(i1 false) +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]] +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]]) +; CHECK-NEXT: ret void +; + %cmp1 = icmp eq i8 %x, 0 + %cmp2 = icmp ne i8 %x, 0 + tail call void @llvm.assume(i1 %cmp1) + tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9 + tail call void @llvm.assume(i1 %cmp1) + tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9 + tail call void @llvm.assume(i1 %cmp2) + tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9 + tail call void @llvm.assume(i1 %cmp2) + ret void +} + +; This would crash. +; Does it ever make sense to peek through a bitcast of the icmp operand? + +define i32 @PR40940(<4 x i8> %x) { +; CHECK-LABEL: @PR40940( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[T2:%.*]] = bitcast <4 x i8> [[SHUF]] to i32 +; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 65536 +; CHECK-NEXT: call void @llvm.assume(i1 [[T3]]) +; CHECK-NEXT: ret i32 [[T2]] +; + %shuf = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %t2 = bitcast <4 x i8> %shuf to i32 + %t3 = icmp ult i32 %t2, 65536 + call void @llvm.assume(i1 %t3) + ret i32 %t2 +} + +define i1 @nonnull3A(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3A( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i1 true +; CHECK: not_taken: +; CHECK-NEXT: [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null +; CHECK-NEXT: ret i1 [[RVAL_2]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + call void @llvm.assume(i1 %cmp) + ret i1 %cmp +not_taken: + call void @llvm.assume(i1 %cmp) + %rval.2 = icmp sgt i32* %load, null + ret i1 %rval.2 +} + +define i1 @nonnull3B(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3B( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) [ "nonnull"(i32* [[LOAD]]), "nonnull"(i1 [[CMP]]) ] +; CHECK-NEXT: ret i1 true +; CHECK: not_taken: +; CHECK-NEXT: ret i1 [[CONTROL]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + ret i1 %cmp +not_taken: + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + ret i1 %control +} + +declare i1 @tmp1(i1) + +define i1 @nonnull3C(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3C( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: [[CMP2:%.*]] = call i1 @tmp1(i1 [[CMP]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: not_taken: +; CHECK-NEXT: ret i1 [[CONTROL]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + %cmp2 = call i1 @tmp1(i1 %cmp) + br label %exit +exit: + ; FIXME: this shouldn't be dropped because it is still dominated by the new position of %load + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp)] + ret i1 %cmp2 +not_taken: + call void @llvm.assume(i1 %cmp) + ret i1 %control +} + +define i1 @nonnull3D(i32** %a, i1 %control) { +; CHECK-LABEL: @nonnull3D( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]] +; CHECK: taken: +; CHECK-NEXT: [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LOAD]], null +; CHECK-NEXT: [[CMP2:%.*]] = call i1 @tmp1(i1 [[CMP]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: not_taken: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "ignore"(i32* undef), "ignore"(i1 undef), "nonnull"(i1 [[CONTROL]]) ] +; CHECK-NEXT: ret i1 [[CONTROL]] +; +entry: + %load = load i32*, i32** %a + %cmp = icmp ne i32* %load, null + br i1 %control, label %taken, label %not_taken +taken: + %cmp2 = call i1 @tmp1(i1 %cmp) + br label %exit +exit: + ret i1 %cmp2 +not_taken: + call void @llvm.assume(i1 %cmp) ["nonnull"(i32* %load), "nonnull"(i1 %cmp), "nonnull"(i1 %control)] + ret i1 %control +} + + +define void @always_true_assumption() { +; CHECK-LABEL: @always_true_assumption( +; CHECK-NEXT: ret void +; + call void @llvm.assume(i1 true) + ret void +} + +; The alloca guarantees that the low bits of %a are zero because of alignment. +; The assume says the opposite. Make sure we don't crash. + +define i64 @PR31809() { +; CHECK-LABEL: @PR31809( +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[T1:%.*]] = ptrtoint i32* [[A]] to i64 +; CHECK-NEXT: call void @llvm.assume(i1 false) +; CHECK-NEXT: ret i64 [[T1]] +; + %a = alloca i32 + %t1 = ptrtoint i32* %a to i64 + %cond = icmp eq i64 %t1, 3 + call void @llvm.assume(i1 %cond) + ret i64 %t1 +} + +; Similar to above: there's no way to know which assumption is truthful, +; so just don't crash. + +define i8 @conflicting_assumptions(i8 %x){ +; CHECK-LABEL: @conflicting_assumptions( +; CHECK-NEXT: call void @llvm.assume(i1 false) +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i8 [[X:%.*]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]]) +; CHECK-NEXT: ret i8 5 +; + %add = add i8 %x, 1 + %cond1 = icmp eq i8 %x, 3 + call void @llvm.assume(i1 %cond1) + %cond2 = icmp eq i8 %x, 4 + call void @llvm.assume(i1 %cond2) + ret i8 %add +} + +; Another case of conflicting assumptions. This would crash because we'd +; try to set more known bits than existed in the known bits struct. + +define void @PR36270(i32 %b) { +; CHECK-LABEL: @PR36270( +; CHECK-NEXT: unreachable +; + %B7 = xor i32 -1, 2147483647 + %and1 = and i32 %b, 3 + %B12 = lshr i32 %B7, %and1 + %C1 = icmp ult i32 %and1, %B12 + tail call void @llvm.assume(i1 %C1) + %cmp2 = icmp eq i32 0, %B12 + tail call void @llvm.assume(i1 %cmp2) + unreachable +} + +; PR47416 + +define i32 @unreachable_assume(i32 %x, i32 %y) { +; CHECK-LABEL: @unreachable_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = or i1 %cmp0, %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + unreachable +} + +define i32 @unreachable_assumes_and_store(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @unreachable_assumes_and_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP0]], [[CMP1]] +; CHECK-NEXT: tail call void @llvm.assume(i1 [[OR]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X]], 1 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], -2 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[A]], 104 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP3]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: unreachable +; +entry: + %cmp0 = icmp sgt i32 %x, 1 + %cmp1 = icmp eq i32 %y, 1 + %or = or i1 %cmp0, %cmp1 + tail call void @llvm.assume(i1 %or) + %cmp2 = icmp eq i32 %x, 1 + br i1 %cmp2, label %if, label %exit + +if: + %a = and i32 %y, -2 + %cmp3 = icmp ne i32 %a, 104 + tail call void @llvm.assume(i1 %cmp3) + br label %exit + +exit: + %cmp4 = icmp eq i32 %x, 2 + tail call void @llvm.assume(i1 %cmp4) + %cmp5 = icmp ugt i32 %y, 42 + tail call void @llvm.assume(i1 %cmp5) + store i32 %x, i32* %p + unreachable +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!5, !6, !7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "Me", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: null, retainedTypes: null, imports: null) +!1 = !DILocalVariable(name: "", arg: 1, scope: !2, file: null, line: 1, type: null) +!2 = distinct !DISubprogram(name: "debug", linkageName: "debug", scope: null, file: null, line: 0, type: null, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!3 = !DIFile(filename: "consecutive-fences.ll", directory: "") +!5 = !{i32 2, !"Dwarf Version", i32 4} +!6 = !{i32 2, !"Debug Info Version", i32 3} +!7 = !{i32 1, !"wchar_size", i32 4} +!8 = !{i32 7, !"PIC Level", i32 2} +!9 = !DILocation(line: 0, column: 0, scope: !2) + + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + diff --git a/llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll b/llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll new file mode 100644 index 0000000..3730496 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll @@ -0,0 +1,867 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" + +define i32 @test1(i32 %i) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[T12:%.*]] = call i32 @llvm.bswap.i32(i32 [[I:%.*]]) +; CHECK-NEXT: ret i32 [[T12]] +; + %t1 = lshr i32 %i, 24 + %t3 = lshr i32 %i, 8 + %t4 = and i32 %t3, 65280 + %t5 = or i32 %t1, %t4 + %t7 = shl i32 %i, 8 + %t8 = and i32 %t7, 16711680 + %t9 = or i32 %t5, %t8 + %t11 = shl i32 %i, 24 + %t12 = or i32 %t9, %t11 + ret i32 %t12 +} + +define <2 x i32> @test1_vector(<2 x i32> %i) { +; CHECK-LABEL: @test1_vector( +; CHECK-NEXT: [[T12:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[I:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[T12]] +; + %t1 = lshr <2 x i32> %i, + %t3 = lshr <2 x i32> %i, + %t4 = and <2 x i32> %t3, + %t5 = or <2 x i32> %t1, %t4 + %t7 = shl <2 x i32> %i, + %t8 = and <2 x i32> %t7, + %t9 = or <2 x i32> %t5, %t8 + %t11 = shl <2 x i32> %i, + %t12 = or <2 x i32> %t9, %t11 + ret <2 x i32> %t12 +} + +define i32 @test2(i32 %arg) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[T14:%.*]] = call i32 @llvm.bswap.i32(i32 [[ARG:%.*]]) +; CHECK-NEXT: ret i32 [[T14]] +; + %t2 = shl i32 %arg, 24 + %t4 = shl i32 %arg, 8 + %t5 = and i32 %t4, 16711680 + %t6 = or i32 %t2, %t5 + %t8 = lshr i32 %arg, 8 + %t9 = and i32 %t8, 65280 + %t10 = or i32 %t6, %t9 + %t12 = lshr i32 %arg, 24 + %t14 = or i32 %t10, %t12 + ret i32 %t14 +} + +define <2 x i32> @test2_vector(<2 x i32> %arg) { +; CHECK-LABEL: @test2_vector( +; CHECK-NEXT: [[T14:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[ARG:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[T14]] +; + %t2 = shl <2 x i32> %arg, + %t4 = shl <2 x i32> %arg, + %t5 = and <2 x i32> %t4, + %t6 = or <2 x i32> %t2, %t5 + %t8 = lshr <2 x i32> %arg, + %t9 = and <2 x i32> %t8, + %t10 = or <2 x i32> %t6, %t9 + %t12 = lshr <2 x i32> %arg, + %t14 = or <2 x i32> %t10, %t12 + ret <2 x i32> %t14 +} + +define <2 x i32> @test2_vector_undef(<2 x i32> %arg) { +; CHECK-LABEL: @test2_vector_undef( +; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], +; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], +; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], +; CHECK-NEXT: [[T6:%.*]] = or <2 x i32> [[T2]], [[T5]] +; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], +; CHECK-NEXT: [[T10:%.*]] = or <2 x i32> [[T6]], [[T9]] +; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[T14:%.*]] = or <2 x i32> [[T10]], [[T12]] +; CHECK-NEXT: ret <2 x i32> [[T14]] +; + %t2 = shl <2 x i32> %arg, + %t4 = shl <2 x i32> %arg, + %t5 = and <2 x i32> %t4, + %t6 = or <2 x i32> %t2, %t5 + %t8 = lshr <2 x i32> %arg, + %t9 = and <2 x i32> %t8, + %t10 = or <2 x i32> %t6, %t9 + %t12 = lshr <2 x i32> %arg, + %t14 = or <2 x i32> %t10, %t12 + ret <2 x i32> %t14 +} + +define i16 @test3(i16 %s) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[T5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) +; CHECK-NEXT: ret i16 [[T5]] +; + %t2 = lshr i16 %s, 8 + %t4 = shl i16 %s, 8 + %t5 = or i16 %t2, %t4 + ret i16 %t5 +} + +define <2 x i16> @test3_vector(<2 x i16> %s) { +; CHECK-LABEL: @test3_vector( +; CHECK-NEXT: [[T5:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[S:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t2 = lshr <2 x i16> %s, + %t4 = shl <2 x i16> %s, + %t5 = or <2 x i16> %t2, %t4 + ret <2 x i16> %t5 +} + +define <2 x i16> @test3_vector_undef(<2 x i16> %s) { +; CHECK-LABEL: @test3_vector_undef( +; CHECK-NEXT: [[T5:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[S:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t2 = lshr <2 x i16> %s, + %t4 = shl <2 x i16> %s, + %t5 = or <2 x i16> %t2, %t4 + ret <2 x i16> %t5 +} + +define i16 @test4(i16 %s) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[T5:%.*]] = call i16 @llvm.bswap.i16(i16 [[S:%.*]]) +; CHECK-NEXT: ret i16 [[T5]] +; + %t2 = lshr i16 %s, 8 + %t4 = shl i16 %s, 8 + %t5 = or i16 %t4, %t2 + ret i16 %t5 +} + +define <2 x i16> @test4_vector(<2 x i16> %s) { +; CHECK-LABEL: @test4_vector( +; CHECK-NEXT: [[T5:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[S:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T5]] +; + %t2 = lshr <2 x i16> %s, + %t4 = shl <2 x i16> %s, + %t5 = or <2 x i16> %t4, %t2 + ret <2 x i16> %t5 +} + +define i16 @test5(i16 %a) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[T_UPGRD_3:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[T_UPGRD_3]] +; + %t = zext i16 %a to i32 + %t1 = and i32 %t, 65280 + %t2 = ashr i32 %t1, 8 + %t2.upgrd.1 = trunc i32 %t2 to i16 + %t4 = and i32 %t, 255 + %t5 = shl i32 %t4, 8 + %t5.upgrd.2 = trunc i32 %t5 to i16 + %t.upgrd.3 = or i16 %t2.upgrd.1, %t5.upgrd.2 + %t6 = bitcast i16 %t.upgrd.3 to i16 + %t6.upgrd.4 = zext i16 %t6 to i32 + %retval = trunc i32 %t6.upgrd.4 to i16 + ret i16 %retval +} + +define <2 x i16> @test5_vector(<2 x i16> %a) { +; CHECK-LABEL: @test5_vector( +; CHECK-NEXT: [[T_UPGRD_3:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[A:%.*]]) +; CHECK-NEXT: ret <2 x i16> [[T_UPGRD_3]] +; + %t = zext <2 x i16> %a to <2 x i32> + %t1 = and <2 x i32> %t, + %t2 = ashr <2 x i32> %t1, + %t2.upgrd.1 = trunc <2 x i32> %t2 to <2 x i16> + %t4 = and <2 x i32> %t, + %t5 = shl <2 x i32> %t4, + %t5.upgrd.2 = trunc <2 x i32> %t5 to <2 x i16> + %t.upgrd.3 = or <2 x i16> %t2.upgrd.1, %t5.upgrd.2 + %t6 = bitcast <2 x i16> %t.upgrd.3 to <2 x i16> + %t6.upgrd.4 = zext <2 x i16> %t6 to <2 x i32> + %retval = trunc <2 x i32> %t6.upgrd.4 to <2 x i16> + ret <2 x i16> %retval +} + +; PR2842 +define i32 @test6(i32 %x) nounwind readnone { +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[T7:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[T7]] +; + %t = shl i32 %x, 16 + %x.mask = and i32 %x, 65280 + %t1 = lshr i32 %x, 16 + %t2 = and i32 %t1, 255 + %t3 = or i32 %x.mask, %t + %t4 = or i32 %t3, %t2 + %t5 = shl i32 %t4, 8 + %t6 = lshr i32 %x, 24 + %t7 = or i32 %t5, %t6 + ret i32 %t7 +} + +define <2 x i32> @test6_vector(<2 x i32> %x) nounwind readnone { +; CHECK-LABEL: @test6_vector( +; CHECK-NEXT: [[T7:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[T7]] +; + %t = shl <2 x i32> %x, + %x.mask = and <2 x i32> %x, + %t1 = lshr <2 x i32> %x, + %t2 = and <2 x i32> %t1, + %t3 = or <2 x i32> %x.mask, %t + %t4 = or <2 x i32> %t3, %t2 + %t5 = shl <2 x i32> %t4, + %t6 = lshr <2 x i32> %x, + %t7 = or <2 x i32> %t5, %t6 + ret <2 x i32> %t7 +} + +declare void @extra_use(i32) + +; swaphalf = (x << 16 | x >> 16) +; ((swaphalf & 0x00ff00ff) << 8) | ((swaphalf >> 8) & 0x00ff00ff) + +define i32 @bswap32_and_first(i32 %x) { +; CHECK-LABEL: @bswap32_and_first( +; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[BSWAP]] +; + %shl = shl i32 %x, 16 + %shr = lshr i32 %x, 16 + %swaphalf = or i32 %shl, %shr + %t = and i32 %swaphalf, 16711935 + %tshl = shl nuw i32 %t, 8 + %b = lshr i32 %swaphalf, 8 + %band = and i32 %b, 16711935 + %bswap = or i32 %tshl, %band + ret i32 %bswap +} + +; Extra use should not prevent matching to bswap. +; swaphalf = (x << 16 | x >> 16) +; ((swaphalf & 0x00ff00ff) << 8) | ((swaphalf >> 8) & 0x00ff00ff) + +define i32 @bswap32_and_first_extra_use(i32 %x) { +; CHECK-LABEL: @bswap32_and_first_extra_use( +; CHECK-NEXT: [[SWAPHALF:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 16) +; CHECK-NEXT: [[T:%.*]] = and i32 [[SWAPHALF]], 16711935 +; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X]]) +; CHECK-NEXT: call void @extra_use(i32 [[T]]) +; CHECK-NEXT: ret i32 [[BSWAP]] +; + %shl = shl i32 %x, 16 + %shr = lshr i32 %x, 16 + %swaphalf = or i32 %shl, %shr + %t = and i32 %swaphalf, 16711935 + %tshl = shl nuw i32 %t, 8 + %b = lshr i32 %swaphalf, 8 + %band = and i32 %b, 16711935 + %bswap = or i32 %tshl, %band + call void @extra_use(i32 %t) + ret i32 %bswap +} + +; swaphalf = (x << 16 | x >> 16) +; ((swaphalf << 8) & 0xff00ff00) | ((swaphalf >> 8) & 0x00ff00ff) + +; PR23863 +define i32 @bswap32_shl_first(i32 %x) { +; CHECK-LABEL: @bswap32_shl_first( +; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[BSWAP]] +; + %shl = shl i32 %x, 16 + %shr = lshr i32 %x, 16 + %swaphalf = or i32 %shl, %shr + %t = shl i32 %swaphalf, 8 + %tand = and i32 %t, -16711936 + %b = lshr i32 %swaphalf, 8 + %band = and i32 %b, 16711935 + %bswap = or i32 %tand, %band + ret i32 %bswap +} + +; Extra use should not prevent matching to bswap. +; swaphalf = (x << 16 | x >> 16) +; ((swaphalf << 8) & 0xff00ff00) | ((swaphalf >> 8) & 0x00ff00ff) + +define i32 @bswap32_shl_first_extra_use(i32 %x) { +; CHECK-LABEL: @bswap32_shl_first_extra_use( +; CHECK-NEXT: [[SWAPHALF:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 16) +; CHECK-NEXT: [[T:%.*]] = shl i32 [[SWAPHALF]], 8 +; CHECK-NEXT: [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 [[X]]) +; CHECK-NEXT: call void @extra_use(i32 [[T]]) +; CHECK-NEXT: ret i32 [[BSWAP]] +; + %shl = shl i32 %x, 16 + %shr = lshr i32 %x, 16 + %swaphalf = or i32 %shl, %shr + %t = shl i32 %swaphalf, 8 + %tand = and i32 %t, -16711936 + %b = lshr i32 %swaphalf, 8 + %band = and i32 %b, 16711935 + %bswap = or i32 %tand, %band + call void @extra_use(i32 %t) + ret i32 %bswap +} + +define i16 @test8(i16 %a) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[OR]] +; + %conv = zext i16 %a to i32 + %shr = lshr i16 %a, 8 + %shl = shl i32 %conv, 8 + %conv1 = zext i16 %shr to i32 + %or = or i32 %conv1, %shl + %conv2 = trunc i32 %or to i16 + ret i16 %conv2 +} + +define i16 @test9(i16 %a) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; CHECK-NEXT: ret i16 [[OR]] +; + %conv = zext i16 %a to i32 + %shr = lshr i32 %conv, 8 + %shl = shl i32 %conv, 8 + %or = or i32 %shr, %shl + %conv2 = trunc i32 %or to i16 + ret i16 %conv2 +} + +define i16 @test10(i32 %a) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: ret i16 [[REV]] +; + %shr1 = lshr i32 %a, 8 + %and1 = and i32 %shr1, 255 + %and2 = shl i32 %a, 8 + %shl1 = and i32 %and2, 65280 + %or = or i32 %and1, %shl1 + %conv = trunc i32 %or to i16 + ret i16 %conv +} + +define <2 x i16> @test10_vector(<2 x i32> %a) { +; CHECK-LABEL: @test10_vector( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[A:%.*]] to <2 x i16> +; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]]) +; CHECK-NEXT: ret <2 x i16> [[REV]] +; + %shr1 = lshr <2 x i32> %a, + %and1 = and <2 x i32> %shr1, + %and2 = shl <2 x i32> %a, + %shl1 = and <2 x i32> %and2, + %or = or <2 x i32> %and1, %shl1 + %conv = trunc <2 x i32> %or to <2 x i16> + ret <2 x i16> %conv +} + +define i64 @PR39793_bswap_u64_as_u32(i64 %0) { +; CHECK-LABEL: @PR39793_bswap_u64_as_u32( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i32 +; CHECK-NEXT: [[REV:%.*]] = call i32 @llvm.bswap.i32(i32 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[REV]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = lshr i64 %0, 24 + %3 = and i64 %2, 255 + %4 = lshr i64 %0, 8 + %5 = and i64 %4, 65280 + %6 = or i64 %3, %5 + %7 = shl i64 %0, 8 + %8 = and i64 %7, 16711680 + %9 = or i64 %6, %8 + %10 = shl i64 %0, 24 + %11 = and i64 %10, 4278190080 + %12 = or i64 %9, %11 + ret i64 %12 +} + +define i16 @PR39793_bswap_u64_as_u32_trunc(i64 %0) { +; CHECK-LABEL: @PR39793_bswap_u64_as_u32_trunc( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i32 +; CHECK-NEXT: [[REV:%.*]] = call i32 @llvm.bswap.i32(i32 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[REV]] to i16 +; CHECK-NEXT: ret i16 [[TMP2]] +; + %2 = lshr i64 %0, 24 + %3 = and i64 %2, 255 + %4 = lshr i64 %0, 8 + %5 = and i64 %4, 65280 + %6 = or i64 %3, %5 + %7 = shl i64 %0, 8 + %8 = and i64 %7, 16711680 + %9 = or i64 %6, %8 + %10 = shl i64 %0, 24 + %11 = and i64 %10, 4278190080 + %12 = or i64 %9, %11 + %13 = trunc i64 %12 to i16 + ret i16 %13 +} + +define i64 @PR39793_bswap_u64_as_u16(i64 %0) { +; CHECK-LABEL: @PR39793_bswap_u64_as_u16( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = lshr i64 %0, 8 + %3 = and i64 %2, 255 + %4 = shl i64 %0, 8 + %5 = and i64 %4, 65280 + %6 = or i64 %3, %5 + ret i64 %6 +} + +define <2 x i64> @PR39793_bswap_u64_as_u16_vector(<2 x i64> %0) { +; CHECK-LABEL: @PR39793_bswap_u64_as_u16_vector( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[TMP0:%.*]] to <2 x i16> +; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[REV]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; + %2 = lshr <2 x i64> %0, + %3 = and <2 x i64> %2, + %4 = shl <2 x i64> %0, + %5 = and <2 x i64> %4, + %6 = or <2 x i64> %3, %5 + ret <2 x i64> %6 +} + +define i8 @PR39793_bswap_u64_as_u16_trunc(i64 %0) { +; CHECK-LABEL: @PR39793_bswap_u64_as_u16_trunc( +; CHECK-NEXT: [[REV1:%.*]] = lshr i64 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[REV1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %2 = lshr i64 %0, 8 + %3 = and i64 %2, 255 + %4 = shl i64 %0, 8 + %5 = and i64 %4, 65280 + %6 = or i64 %3, %5 + %7 = trunc i64 %6 to i8 + ret i8 %7 +} + +define i50 @PR39793_bswap_u50_as_u16(i50 %0) { +; CHECK-LABEL: @PR39793_bswap_u50_as_u16( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i50 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i50 +; CHECK-NEXT: ret i50 [[TMP2]] +; + %2 = lshr i50 %0, 8 + %3 = and i50 %2, 255 + %4 = shl i50 %0, 8 + %5 = and i50 %4, 65280 + %6 = or i50 %3, %5 + ret i50 %6 +} + +define i32 @PR39793_bswap_u32_as_u16(i32 %0) { +; CHECK-LABEL: @PR39793_bswap_u32_as_u16( +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[TMP0:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[REV]] to i32 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = lshr i32 %0, 8 + %3 = and i32 %2, 255 + %4 = shl i32 %0, 8 + %5 = and i32 %4, 65280 + %6 = or i32 %3, %5 + ret i32 %6 +} + +define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { +; CHECK-LABEL: @PR39793_bswap_u32_as_u16_trunc( +; CHECK-NEXT: [[REV1:%.*]] = lshr i32 [[TMP0:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[REV1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %2 = lshr i32 %0, 8 + %3 = and i32 %2, 255 + %4 = shl i32 %0, 8 + %5 = and i32 %4, 65280 + %6 = or i32 %3, %5 + %7 = trunc i32 %6 to i8 + ret i8 %7 +} + +define i32 @partial_bswap(i32 %x) { +; CHECK-LABEL: @partial_bswap( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; + %x3 = shl i32 %x, 24 + %a2 = shl i32 %x, 8 + %x2 = and i32 %a2, 16711680 + %x32 = or i32 %x3, %x2 + %t1 = and i32 %x, -65536 + %t2 = call i32 @llvm.bswap.i32(i32 %t1) + %r = or i32 %x32, %t2 + ret i32 %r +} +declare i32 @llvm.bswap.i32(i32) + +define <2 x i32> @partial_bswap_vector(<2 x i32> %x) { +; CHECK-LABEL: @partial_bswap_vector( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %x3 = shl <2 x i32> %x, + %a2 = shl <2 x i32> %x, + %x2 = and <2 x i32> %a2, + %x32 = or <2 x i32> %x3, %x2 + %t1 = and <2 x i32> %x, + %t2 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %t1) + %r = or <2 x i32> %x32, %t2 + ret <2 x i32> %r +} +declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) + +define i16 @partial_bitreverse(i16 %x) { +; CHECK-LABEL: @partial_bitreverse( +; CHECK-NEXT: [[OR:%.*]] = call i16 @llvm.bswap.i16(i16 [[X:%.*]]) +; CHECK-NEXT: ret i16 [[OR]] +; + %rev= call i16 @llvm.bitreverse.i16(i16 %x) + %lo = and i16 %rev, 255 + %hi = and i16 %rev, -256 + %revlo = call i16 @llvm.bitreverse.i16(i16 %lo) + %revhi = call i16 @llvm.bitreverse.i16(i16 %hi) + %newlo = lshr i16 %revlo, 8 + %newhi = shl i16 %revhi, 8 + %or = or i16 %newlo, %newhi + ret i16 %or +} +declare i16 @llvm.bitreverse.i16(i16) + +define i64 @bswap_and_mask_0(i64 %0) { +; CHECK-LABEL: @bswap_and_mask_0( +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037927681 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] +; + %2 = lshr i64 %0, 56 + %3 = shl i64 %0, 56 + %4 = or i64 %2, %3 + ret i64 %4 +} + +define i64 @bswap_and_mask_1(i64 %0) { +; CHECK-LABEL: @bswap_and_mask_1( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 56 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP0]], 40 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP3]], 65280 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP4]], [[TMP2]] +; CHECK-NEXT: ret i64 [[TMP5]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = or i64 %4, %2 + ret i64 %5 +} + +define i64 @bswap_and_mask_2(i64 %0) { +; CHECK-LABEL: @bswap_and_mask_2( +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0:%.*]], -72057594037862401 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] +; + %2 = lshr i64 %0, 56 + %3 = shl i64 %0, 56 + %4 = or i64 %2, %3 + %5 = shl i64 %0, 40 + %6 = and i64 %5, 71776119061217280 + %7 = or i64 %4, %6 + ret i64 %7 +} + +define i64 @bswap_trunc(i64 %x01234567) { +; CHECK-LABEL: @bswap_trunc( +; CHECK-NEXT: [[X7ZZZZZZZ:%.*]] = shl i64 [[X01234567:%.*]], 56 +; CHECK-NEXT: [[XZ0123456:%.*]] = lshr i64 [[X01234567]], 8 +; CHECK-NEXT: [[XZZZZZ012:%.*]] = lshr i64 [[X01234567]], 40 +; CHECK-NEXT: [[X3456:%.*]] = trunc i64 [[XZ0123456]] to i32 +; CHECK-NEXT: [[XZ012:%.*]] = trunc i64 [[XZZZZZ012]] to i32 +; CHECK-NEXT: [[X6543:%.*]] = call i32 @llvm.bswap.i32(i32 [[X3456]]) +; CHECK-NEXT: [[X210Z:%.*]] = call i32 @llvm.bswap.i32(i32 [[XZ012]]) +; CHECK-NEXT: [[XZ210:%.*]] = lshr exact i32 [[X210Z]], 8 +; CHECK-NEXT: [[XZZZZ6543:%.*]] = zext i32 [[X6543]] to i64 +; CHECK-NEXT: [[XZZZZZ210:%.*]] = zext i32 [[XZ210]] to i64 +; CHECK-NEXT: [[XZ6543ZZZ:%.*]] = shl nuw nsw i64 [[XZZZZ6543]], 24 +; CHECK-NEXT: [[XZ6543210:%.*]] = or i64 [[XZ6543ZZZ]], [[XZZZZZ210]] +; CHECK-NEXT: [[X76543210:%.*]] = or i64 [[XZ6543210]], [[X7ZZZZZZZ]] +; CHECK-NEXT: ret i64 [[X76543210]] +; + %x7zzzzzzz = shl i64 %x01234567, 56 + %xz0123456 = lshr i64 %x01234567, 8 + %xzzzzz012 = lshr i64 %x01234567, 40 + %x3456 = trunc i64 %xz0123456 to i32 + %xz012 = trunc i64 %xzzzzz012 to i32 + %x6543 = call i32 @llvm.bswap.i32(i32 %x3456) + %x210z = call i32 @llvm.bswap.i32(i32 %xz012) + %xz210 = lshr i32 %x210z, 8 + %xzzzz6543 = zext i32 %x6543 to i64 + %xzzzzz210 = zext i32 %xz210 to i64 + %xz6543zzz = shl i64 %xzzzz6543, 24 + %xz6543210 = or i64 %xzzzzz210, %xz6543zzz + %x76543210 = or i64 %xz6543210, %x7zzzzzzz + ret i64 %x76543210 +} + +define i32 @shuf_4bytes(<4 x i8> %x) { +; CHECK-LABEL: @shuf_4bytes( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[X:%.*]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) +; CHECK-NEXT: ret i32 [[CAST]] +; + %bswap = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %cast = bitcast <4 x i8> %bswap to i32 + ret i32 %cast +} + +define i32 @shuf_load_4bytes(<4 x i8>* %p) { +; CHECK-LABEL: @shuf_load_4bytes( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* [[P:%.*]] to i32* +; CHECK-NEXT: [[X1:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[X1]]) +; CHECK-NEXT: ret i32 [[CAST]] +; + %x = load <4 x i8>, <4 x i8>* %p + %bswap = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %cast = bitcast <4 x i8> %bswap to i32 + ret i32 %cast +} + +define i32 @shuf_bitcast_twice_4bytes(i32 %x) { +; CHECK-LABEL: @shuf_bitcast_twice_4bytes( +; CHECK-NEXT: [[CAST2:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[CAST2]] +; + %cast1 = bitcast i32 %x to <4 x i8> + %bswap = shufflevector <4 x i8> %cast1, <4 x i8> poison, <4 x i32> + %cast2 = bitcast <4 x i8> %bswap to i32 + ret i32 %cast2 +} + +; Negative test - extra use +declare void @use(<4 x i8>) + +define i32 @shuf_4bytes_extra_use(<4 x i8> %x) { +; CHECK-LABEL: @shuf_4bytes_extra_use( +; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: call void @use(<4 x i8> [[BSWAP]]) +; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: ret i32 [[CAST]] +; + %bswap = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + call void @use(<4 x i8> %bswap) + %cast = bitcast <4 x i8> %bswap to i32 + ret i32 %cast +} + +; Negative test - scalar type is not in the data layout + +define i128 @shuf_16bytes(<16 x i8> %x) { +; CHECK-LABEL: @shuf_16bytes( +; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast <16 x i8> [[BSWAP]] to i128 +; CHECK-NEXT: ret i128 [[CAST]] +; + %bswap = shufflevector <16 x i8> %x, <16 x i8> poison, <16 x i32> + %cast = bitcast <16 x i8> %bswap to i128 + ret i128 %cast +} + +; Negative test - don't touch widening shuffles (for now) + +define i32 @shuf_2bytes_widening(<2 x i8> %x) { +; CHECK-LABEL: @shuf_2bytes_widening( +; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: ret i32 [[CAST]] +; + %bswap = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> + %cast = bitcast <4 x i8> %bswap to i32 + ret i32 %cast +} + +declare i32 @llvm.fshl.i32(i32, i32, i32) +declare i32 @llvm.fshr.i32(i32, i32, i32) + +define i32 @funnel_unary(i32 %abcd) { +; CHECK-LABEL: @funnel_unary( +; CHECK-NEXT: [[DCBA:%.*]] = call i32 @llvm.bswap.i32(i32 [[ABCD:%.*]]) +; CHECK-NEXT: ret i32 [[DCBA]] +; + %dabc = call i32 @llvm.fshl.i32(i32 %abcd, i32 %abcd, i32 24) + %bcda = call i32 @llvm.fshr.i32(i32 %abcd, i32 %abcd, i32 24) + %dzbz = and i32 %dabc, -16711936 + %zcza = and i32 %bcda, 16711935 + %dcba = or i32 %dzbz, %zcza + ret i32 %dcba +} + +define i32 @funnel_binary(i32 %abcd) { +; CHECK-LABEL: @funnel_binary( +; CHECK-NEXT: [[DCBA:%.*]] = call i32 @llvm.bswap.i32(i32 [[ABCD:%.*]]) +; CHECK-NEXT: ret i32 [[DCBA]] +; + %cdzz = shl i32 %abcd, 16 + %dcdz = call i32 @llvm.fshl.i32(i32 %abcd, i32 %cdzz, i32 24) + %zzab = lshr i32 %abcd, 16 + %zaba = call i32 @llvm.fshr.i32(i32 %zzab, i32 %abcd, i32 24) + %dczz = and i32 %dcdz, -65536 + %zzba = and i32 %zaba, 65535 + %dcba = or i32 %dczz, %zzba + ret i32 %dcba +} + +define i32 @funnel_and(i32 %abcd) { +; CHECK-LABEL: @funnel_and( +; CHECK-NEXT: [[DCBA:%.*]] = call i32 @llvm.bswap.i32(i32 [[ABCD:%.*]]) +; CHECK-NEXT: ret i32 [[DCBA]] +; + %zzcz = and i32 %abcd, 65280 + %zcza = call i32 @llvm.fshl.i32(i32 %zzcz, i32 %abcd, i32 8) + %zbzz = and i32 %abcd, 16711680 + %dzbz = call i32 @llvm.fshl.i32(i32 %abcd, i32 %zbzz, i32 24) + %dcba = or i32 %zcza, %dzbz + ret i32 %dcba +} + +; PR47191 - deep IR trees prevent ADD/XOR instructions being simplified to OR. + +define i64 @PR47191_problem1(i64 %0) { +; CHECK-LABEL: @PR47191_problem1( +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = lshr i64 %0, 24 + %6 = and i64 %5, 16711680 + %7 = lshr i64 %0, 8 + %8 = and i64 %7, 4278190080 + %9 = shl i64 %0, 56 + %10 = shl i64 %0, 40 + %11 = and i64 %10, 71776119061217280 + %12 = shl i64 %0, 24 + %13 = and i64 %12, 280375465082880 + %14 = or i64 %9, %2 + %15 = or i64 %14, %4 + %16 = or i64 %15, %6 + %17 = or i64 %16, %8 + %18 = or i64 %17, %11 + %19 = or i64 %18, %13 + %20 = shl i64 %0, 8 + %21 = and i64 %20, 1095216660480 + %22 = add i64 %19, %21 + ret i64 %22 +} + +define i64 @PR47191_problem2(i64 %0) { +; CHECK-LABEL: @PR47191_problem2( +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = lshr i64 %0, 24 + %6 = and i64 %5, 16711680 + %7 = lshr i64 %0, 8 + %8 = and i64 %7, 4278190080 + %9 = shl i64 %0, 56 + %10 = shl i64 %0, 40 + %11 = and i64 %10, 71776119061217280 + %12 = or i64 %9, %2 + %13 = or i64 %12, %4 + %14 = or i64 %13, %6 + %15 = or i64 %14, %8 + %16 = or i64 %15, %11 + %17 = shl i64 %0, 24 + %18 = and i64 %17, 280375465082880 + %19 = shl i64 %0, 8 + %20 = and i64 %19, 1095216660480 + %21 = or i64 %20, %18 + %22 = xor i64 %21, %16 + ret i64 %22 +} + +define i64 @PR47191_problem3(i64 %0) { +; CHECK-LABEL: @PR47191_problem3( +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = lshr i64 %0, 56 + %3 = lshr i64 %0, 40 + %4 = and i64 %3, 65280 + %5 = lshr i64 %0, 24 + %6 = and i64 %5, 16711680 + %7 = lshr i64 %0, 8 + %8 = and i64 %7, 4278190080 + %9 = shl i64 %0, 56 + %10 = shl i64 %0, 40 + %11 = and i64 %10, 71776119061217280 + %12 = or i64 %9, %2 + %13 = or i64 %12, %4 + %14 = or i64 %13, %6 + %15 = or i64 %14, %8 + %16 = or i64 %15, %11 + %17 = shl i64 %0, 24 + %18 = and i64 %17, 280375465082880 + %19 = shl i64 %0, 8 + %20 = and i64 %19, 1095216660480 + %21 = or i64 %20, %18 + %22 = xor i64 %21, %16 + ret i64 %22 +} + +define i64 @PR47191_problem4(i64 %0) { +; CHECK-LABEL: @PR47191_problem4( +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP0:%.*]]) +; CHECK-NEXT: ret i64 [[TMP2]] +; + %2 = lshr i64 %0, 56 + %3 = shl i64 %0, 56 + %4 = or i64 %2, %3 + %5 = lshr i64 %0, 40 + %6 = and i64 %5, 65280 + %7 = or i64 %4, %6 + %8 = shl i64 %0, 40 + %9 = and i64 %8, 71776119061217280 + %10 = or i64 %7, %9 + %11 = lshr i64 %0, 24 + %12 = and i64 %11, 16711680 + %13 = or i64 %10, %12 + %14 = shl i64 %0, 24 + %15 = and i64 %14, 280375465082880 + %16 = or i64 %13, %15 + %17 = lshr i64 %0, 8 + %18 = and i64 %17, 4278190080 + %19 = or i64 %16, %18 + %20 = shl i64 %0, 8 + %21 = and i64 %20, 1095216660480 + %22 = add i64 %19, %21 + ret i64 %22 +} diff --git a/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll b/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll index d614bb0..82c1a5c 100644 --- a/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll @@ -35,7 +35,7 @@ define i64 @test2(i64 %in) { ; ANY-NEXT: ret i64 [[IN:%.*]] ; %vec = insertelement <8 x i64> poison, i64 %in, i32 0 - %splat = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> zeroinitializer + %splat = shufflevector <8 x i64> %vec, <8 x i64> poison, <8 x i32> zeroinitializer %add = add <8 x i64> %splat, %r = extractelement <8 x i64> %add, i32 0 ret i64 %r diff --git a/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll new file mode 100644 index 0000000..083b7a6 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll @@ -0,0 +1,1176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +; (-0.0 - X) * C => X * -C +define float @neg_constant(float %x) { +; CHECK-LABEL: @neg_constant( +; CHECK-NEXT: [[MUL:%.*]] = fmul ninf float [[X:%.*]], -2.000000e+01 +; CHECK-NEXT: ret float [[MUL]] +; + %sub = fsub float -0.0, %x + %mul = fmul ninf float %sub, 2.0e+1 + ret float %mul +} + +define float @unary_neg_constant(float %x) { +; CHECK-LABEL: @unary_neg_constant( +; CHECK-NEXT: [[MUL:%.*]] = fmul ninf float [[X:%.*]], -2.000000e+01 +; CHECK-NEXT: ret float [[MUL]] +; + %sub = fneg float %x + %mul = fmul ninf float %sub, 2.0e+1 + ret float %mul +} + +define <2 x float> @neg_constant_vec(<2 x float> %x) { +; CHECK-LABEL: @neg_constant_vec( +; CHECK-NEXT: [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub = fsub <2 x float> , %x + %mul = fmul ninf <2 x float> %sub, + ret <2 x float> %mul +} + +define <2 x float> @unary_neg_constant_vec(<2 x float> %x) { +; CHECK-LABEL: @unary_neg_constant_vec( +; CHECK-NEXT: [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub = fneg <2 x float> %x + %mul = fmul ninf <2 x float> %sub, + ret <2 x float> %mul +} + +define <2 x float> @neg_constant_vec_undef(<2 x float> %x) { +; CHECK-LABEL: @neg_constant_vec_undef( +; CHECK-NEXT: [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub = fsub <2 x float> , %x + %mul = fmul ninf <2 x float> %sub, + ret <2 x float> %mul +} + +; (0.0 - X) * C => X * -C +define float @neg_nsz_constant(float %x) { +; CHECK-LABEL: @neg_nsz_constant( +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan float [[X:%.*]], -2.000000e+01 +; CHECK-NEXT: ret float [[MUL]] +; + %sub = fsub nsz float 0.0, %x + %mul = fmul nnan float %sub, 2.0e+1 + ret float %mul +} + +define float @unary_neg_nsz_constant(float %x) { +; CHECK-LABEL: @unary_neg_nsz_constant( +; CHECK-NEXT: [[MUL:%.*]] = fmul nnan float [[X:%.*]], -2.000000e+01 +; CHECK-NEXT: ret float [[MUL]] +; + %sub = fneg nsz float %x + %mul = fmul nnan float %sub, 2.0e+1 + ret float %mul +} + +; (-0.0 - X) * (-0.0 - Y) => X * Y +define float @neg_neg(float %x, float %y) { +; CHECK-LABEL: @neg_neg( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sub1 = fsub float -0.0, %x + %sub2 = fsub float -0.0, %y + %mul = fmul arcp float %sub1, %sub2 + ret float %mul +} + +define float @unary_neg_unary_neg(float %x, float %y) { +; CHECK-LABEL: @unary_neg_unary_neg( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sub1 = fneg float %x + %sub2 = fneg float %y + %mul = fmul arcp float %sub1, %sub2 + ret float %mul +} + +define float @unary_neg_neg(float %x, float %y) { +; CHECK-LABEL: @unary_neg_neg( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sub1 = fneg float %x + %sub2 = fsub float -0.0, %y + %mul = fmul arcp float %sub1, %sub2 + ret float %mul +} + +define float @neg_unary_neg(float %x, float %y) { +; CHECK-LABEL: @neg_unary_neg( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sub1 = fsub float -0.0, %x + %sub2 = fneg float %y + %mul = fmul arcp float %sub1, %sub2 + ret float %mul +} + +define <2 x float> @neg_neg_vec(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_neg_vec( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub1 = fsub <2 x float> , %x + %sub2 = fsub <2 x float> , %y + %mul = fmul arcp <2 x float> %sub1, %sub2 + ret <2 x float> %mul +} + +define <2 x float> @unary_neg_unary_neg_vec(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @unary_neg_unary_neg_vec( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub1 = fneg <2 x float> %x + %sub2 = fneg <2 x float> %y + %mul = fmul arcp <2 x float> %sub1, %sub2 + ret <2 x float> %mul +} + +define <2 x float> @unary_neg_neg_vec(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @unary_neg_neg_vec( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub1 = fneg <2 x float> %x + %sub2 = fsub <2 x float> , %y + %mul = fmul arcp <2 x float> %sub1, %sub2 + ret <2 x float> %mul +} + +define <2 x float> @neg_unary_neg_vec(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_unary_neg_vec( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub1 = fsub <2 x float> , %x + %sub2 = fneg <2 x float> %y + %mul = fmul arcp <2 x float> %sub1, %sub2 + ret <2 x float> %mul +} + +define <2 x float> @neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_neg_vec_undef( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub1 = fsub <2 x float> , %x + %sub2 = fsub <2 x float> , %y + %mul = fmul arcp <2 x float> %sub1, %sub2 + ret <2 x float> %mul +} + +define <2 x float> @unary_neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @unary_neg_neg_vec_undef( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %neg = fneg <2 x float> %x + %sub = fsub <2 x float> , %y + %mul = fmul arcp <2 x float> %neg, %sub + ret <2 x float> %mul +} + +define <2 x float> @neg_unary_neg_vec_undef(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_unary_neg_vec_undef( +; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub = fsub <2 x float> , %x + %neg = fneg <2 x float> %y + %mul = fmul arcp <2 x float> %sub, %neg + ret <2 x float> %mul +} + +; (0.0 - X) * (0.0 - Y) => X * Y +define float @neg_neg_nsz(float %x, float %y) { +; CHECK-LABEL: @neg_neg_nsz( +; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sub1 = fsub nsz float 0.0, %x + %sub2 = fsub nsz float 0.0, %y + %mul = fmul afn float %sub1, %sub2 + ret float %mul +} + +declare void @use_f32(float) + +define float @neg_neg_multi_use(float %x, float %y) { +; CHECK-LABEL: @neg_neg_multi_use( +; CHECK-NEXT: [[NX:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[NY:%.*]] = fneg float [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[X]], [[Y]] +; CHECK-NEXT: call void @use_f32(float [[NX]]) +; CHECK-NEXT: call void @use_f32(float [[NY]]) +; CHECK-NEXT: ret float [[MUL]] +; + %nx = fsub float -0.0, %x + %ny = fsub float -0.0, %y + %mul = fmul afn float %nx, %ny + call void @use_f32(float %nx) + call void @use_f32(float %ny) + ret float %mul +} + +define float @unary_neg_unary_neg_multi_use(float %x, float %y) { +; CHECK-LABEL: @unary_neg_unary_neg_multi_use( +; CHECK-NEXT: [[NX:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[NY:%.*]] = fneg float [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[X]], [[Y]] +; CHECK-NEXT: call void @use_f32(float [[NX]]) +; CHECK-NEXT: call void @use_f32(float [[NY]]) +; CHECK-NEXT: ret float [[MUL]] +; + %nx = fneg float %x + %ny = fneg float %y + %mul = fmul afn float %nx, %ny + call void @use_f32(float %nx) + call void @use_f32(float %ny) + ret float %mul +} + +define float @unary_neg_neg_multi_use(float %x, float %y) { +; CHECK-LABEL: @unary_neg_neg_multi_use( +; CHECK-NEXT: [[NX:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[NY:%.*]] = fneg float [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[X]], [[Y]] +; CHECK-NEXT: call void @use_f32(float [[NX]]) +; CHECK-NEXT: call void @use_f32(float [[NY]]) +; CHECK-NEXT: ret float [[MUL]] +; + %nx = fneg float %x + %ny = fsub float -0.0, %y + %mul = fmul afn float %nx, %ny + call void @use_f32(float %nx) + call void @use_f32(float %ny) + ret float %mul +} + +define float @neg_unary_neg_multi_use(float %x, float %y) { +; CHECK-LABEL: @neg_unary_neg_multi_use( +; CHECK-NEXT: [[NX:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[NY:%.*]] = fneg float [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul afn float [[X]], [[Y]] +; CHECK-NEXT: call void @use_f32(float [[NX]]) +; CHECK-NEXT: call void @use_f32(float [[NY]]) +; CHECK-NEXT: ret float [[MUL]] +; + %nx = fsub float -0.0, %x + %ny = fneg float %y + %mul = fmul afn float %nx, %ny + call void @use_f32(float %nx) + call void @use_f32(float %ny) + ret float %mul +} + +; (-0.0 - X) * Y +define float @neg_mul(float %x, float %y) { +; CHECK-LABEL: @neg_mul( +; CHECK-NEXT: [[SUB:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUB]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sub = fsub float -0.0, %x + %mul = fmul float %sub, %y + ret float %mul +} + +define float @unary_neg_mul(float %x, float %y) { +; CHECK-LABEL: @unary_neg_mul( +; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[NEG]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %neg = fneg float %x + %mul = fmul float %neg, %y + ret float %mul +} + +define <2 x float> @neg_mul_vec(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_mul_vec( +; CHECK-NEXT: [[SUB:%.*]] = fneg <2 x float> [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[SUB]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub = fsub <2 x float> , %x + %mul = fmul <2 x float> %sub, %y + ret <2 x float> %mul +} + +define <2 x float> @unary_neg_mul_vec(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @unary_neg_mul_vec( +; CHECK-NEXT: [[SUB:%.*]] = fneg <2 x float> [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[SUB]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub = fneg <2 x float> %x + %mul = fmul <2 x float> %sub, %y + ret <2 x float> %mul +} + +define <2 x float> @neg_mul_vec_undef(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_mul_vec_undef( +; CHECK-NEXT: [[SUB:%.*]] = fneg <2 x float> [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[SUB]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sub = fsub <2 x float> , %x + %mul = fmul <2 x float> %sub, %y + ret <2 x float> %mul +} + +; (0.0 - X) * Y +define float @neg_sink_nsz(float %x, float %y) { +; CHECK-LABEL: @neg_sink_nsz( +; CHECK-NEXT: [[SUB1:%.*]] = fneg nsz float [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUB1]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sub1 = fsub nsz float 0.0, %x + %mul = fmul float %sub1, %y + ret float %mul +} + +define float @neg_sink_multi_use(float %x, float %y) { +; CHECK-LABEL: @neg_sink_multi_use( +; CHECK-NEXT: [[SUB1:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUB1]], [[Y:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = fmul float [[MUL]], [[SUB1]] +; CHECK-NEXT: ret float [[MUL2]] +; + %sub1 = fsub float -0.0, %x + %mul = fmul float %sub1, %y + %mul2 = fmul float %mul, %sub1 + ret float %mul2 +} + +define float @unary_neg_mul_multi_use(float %x, float %y) { +; CHECK-LABEL: @unary_neg_mul_multi_use( +; CHECK-NEXT: [[SUB1:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SUB1]], [[Y:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = fmul float [[MUL]], [[SUB1]] +; CHECK-NEXT: ret float [[MUL2]] +; + %sub1 = fneg float %x + %mul = fmul float %sub1, %y + %mul2 = fmul float %mul, %sub1 + ret float %mul2 +} + +; Don't crash when attempting to cast a constant FMul to an instruction. +define void @test8(i32* %inout) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ , [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i32 2 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* %inout, align 4 + %conv = uitofp i32 %0 to float + %vecinit = insertelement <4 x float> , float %conv, i32 3 + %sub = fsub <4 x float> , %vecinit + %1 = shufflevector <4 x float> %sub, <4 x float> poison, <4 x i32> + %mul = fmul <4 x float> zeroinitializer, %1 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %local_var_7.0 = phi <4 x float> [ %mul, %entry ], [ %2, %for.body ] + br i1 undef, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = insertelement <4 x float> %local_var_7.0, float 0.000000e+00, i32 2 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; X * -1.0 => -0.0 - X +define float @test9(float %x) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: [[MUL:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %mul = fmul float %x, -1.0 + ret float %mul +} + +; PR18532 +define <4 x float> @test10(<4 x float> %x) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[MUL:%.*]] = fneg arcp afn <4 x float> [[X:%.*]] +; CHECK-NEXT: ret <4 x float> [[MUL]] +; + %mul = fmul arcp afn <4 x float> %x, + ret <4 x float> %mul +} + +define float @test11(float %x, float %y) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[B:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = fadd fast float [[B]], 3.000000e+00 +; CHECK-NEXT: ret float [[C]] +; + %a = fadd fast float %x, 1.0 + %b = fadd fast float %y, 2.0 + %c = fadd fast float %a, %b + ret float %c +} + +declare double @llvm.sqrt.f64(double) + +; With unsafe/fast math, sqrt(X) * sqrt(X) is just X, +; but make sure another use of the sqrt is intact. +; Note that the remaining fmul is altered but is not 'fast' +; itself because it was not marked 'fast' originally. +; Thus, we have an overall fast result, but no more indication of +; 'fast'ness in the code. +define double @sqrt_squared2(double %f) { +; CHECK-LABEL: @sqrt_squared2( +; CHECK-NEXT: [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[F:%.*]]) +; CHECK-NEXT: [[MUL2:%.*]] = fmul double [[SQRT]], [[F]] +; CHECK-NEXT: ret double [[MUL2]] +; + %sqrt = call double @llvm.sqrt.f64(double %f) + %mul1 = fmul fast double %sqrt, %sqrt + %mul2 = fmul double %mul1, %sqrt + ret double %mul2 +} + +declare float @llvm.fabs.f32(float) nounwind readnone + +define float @fabs_squared(float %x) { +; CHECK-LABEL: @fabs_squared( +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X:%.*]], [[X]] +; CHECK-NEXT: ret float [[MUL]] +; + %x.fabs = call float @llvm.fabs.f32(float %x) + %mul = fmul float %x.fabs, %x.fabs + ret float %mul +} + +define float @fabs_squared_fast(float %x) { +; CHECK-LABEL: @fabs_squared_fast( +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: ret float [[MUL]] +; + %x.fabs = call float @llvm.fabs.f32(float %x) + %mul = fmul fast float %x.fabs, %x.fabs + ret float %mul +} + +define float @fabs_fabs(float %x, float %y) { +; CHECK-LABEL: @fabs_fabs( +; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) +; CHECK-NEXT: ret float [[MUL]] +; + %x.fabs = call float @llvm.fabs.f32(float %x) + %y.fabs = call float @llvm.fabs.f32(float %y) + %mul = fmul float %x.fabs, %y.fabs + ret float %mul +} + +define float @fabs_fabs_extra_use1(float %x, float %y) { +; CHECK-LABEL: @fabs_fabs_extra_use1( +; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[X_FABS]]) +; CHECK-NEXT: [[TMP1:%.*]] = fmul ninf float [[X]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = call ninf float @llvm.fabs.f32(float [[TMP1]]) +; CHECK-NEXT: ret float [[MUL]] +; + %x.fabs = call float @llvm.fabs.f32(float %x) + call void @use_f32(float %x.fabs) + %y.fabs = call float @llvm.fabs.f32(float %y) + %mul = fmul ninf float %x.fabs, %y.fabs + ret float %mul +} + +define float @fabs_fabs_extra_use2(float %x, float %y) { +; CHECK-LABEL: @fabs_fabs_extra_use2( +; CHECK-NEXT: [[Y_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[Y:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[Y_FABS]]) +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc ninf float [[X:%.*]], [[Y]] +; CHECK-NEXT: [[MUL:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[TMP1]]) +; CHECK-NEXT: ret float [[MUL]] +; + %x.fabs = call fast float @llvm.fabs.f32(float %x) + %y.fabs = call fast float @llvm.fabs.f32(float %y) + call void @use_f32(float %y.fabs) + %mul = fmul reassoc ninf float %x.fabs, %y.fabs + ret float %mul +} + +; negative test - don't create an extra instruction + +define float @fabs_fabs_extra_use3(float %x, float %y) { +; CHECK-LABEL: @fabs_fabs_extra_use3( +; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[X_FABS]]) +; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[Y_FABS]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X_FABS]], [[Y_FABS]] +; CHECK-NEXT: ret float [[MUL]] +; + %x.fabs = call float @llvm.fabs.f32(float %x) + call void @use_f32(float %x.fabs) + %y.fabs = call float @llvm.fabs.f32(float %y) + call void @use_f32(float %y.fabs) + %mul = fmul float %x.fabs, %y.fabs + ret float %mul +} + +; (X*Y) * X => (X*X) * Y +; The transform only requires 'reassoc', but test other FMF in +; the commuted variants to make sure FMF propagates as expected. + +define float @reassoc_common_operand1(float %x, float %y) { +; CHECK-LABEL: @reassoc_common_operand1( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[X]] +; CHECK-NEXT: [[MUL2:%.*]] = fmul reassoc float [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL2]] +; + %mul1 = fmul float %x, %y + %mul2 = fmul reassoc float %mul1, %x + ret float %mul2 +} + +; (Y*X) * X => (X*X) * Y + +define float @reassoc_common_operand2(float %x, float %y) { +; CHECK-LABEL: @reassoc_common_operand2( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], [[X]] +; CHECK-NEXT: [[MUL2:%.*]] = fmul fast float [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL2]] +; + %mul1 = fmul float %y, %x + %mul2 = fmul fast float %mul1, %x + ret float %mul2 +} + +; X * (X*Y) => (X*X) * Y + +define float @reassoc_common_operand3(float %x1, float %y) { +; CHECK-LABEL: @reassoc_common_operand3( +; CHECK-NEXT: [[X:%.*]] = fdiv float [[X1:%.*]], 3.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nnan float [[X]], [[X]] +; CHECK-NEXT: [[MUL2:%.*]] = fmul reassoc nnan float [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL2]] +; + %x = fdiv float %x1, 3.0 ; thwart complexity-based canonicalization + %mul1 = fmul float %x, %y + %mul2 = fmul reassoc nnan float %x, %mul1 + ret float %mul2 +} + +; X * (Y*X) => (X*X) * Y + +define float @reassoc_common_operand4(float %x1, float %y) { +; CHECK-LABEL: @reassoc_common_operand4( +; CHECK-NEXT: [[X:%.*]] = fdiv float [[X1:%.*]], 3.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc ninf float [[X]], [[X]] +; CHECK-NEXT: [[MUL2:%.*]] = fmul reassoc ninf float [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL2]] +; + %x = fdiv float %x1, 3.0 ; thwart complexity-based canonicalization + %mul1 = fmul float %y, %x + %mul2 = fmul reassoc ninf float %x, %mul1 + ret float %mul2 +} + +; No change if the first fmul has another use. + +define float @reassoc_common_operand_multi_use(float %x, float %y) { +; CHECK-LABEL: @reassoc_common_operand_multi_use( +; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = fmul fast float [[MUL1]], [[X]] +; CHECK-NEXT: call void @use_f32(float [[MUL1]]) +; CHECK-NEXT: ret float [[MUL2]] +; + %mul1 = fmul float %x, %y + %mul2 = fmul fast float %mul1, %x + call void @use_f32(float %mul1) + ret float %mul2 +} + +declare float @llvm.log2.f32(float) + +; log2(Y * 0.5) * X = log2(Y) * X - X + +define float @log2half(float %x, float %y) { +; CHECK-LABEL: @log2half( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.log2.f32(float [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fsub fast float [[TMP2]], [[X]] +; CHECK-NEXT: ret float [[MUL]] +; + %halfy = fmul float %y, 0.5 + %log2 = call float @llvm.log2.f32(float %halfy) + %mul = fmul fast float %log2, %x + ret float %mul +} + +define float @log2half_commute(float %x1, float %y) { +; CHECK-LABEL: @log2half_commute( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.log2.f32(float [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = fsub fast float [[TMP2]], [[X1]] +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP3]], 0x3FC24924A0000000 +; CHECK-NEXT: ret float [[MUL]] +; + %x = fdiv float %x1, 7.0 ; thwart complexity-based canonicalization + %halfy = fmul float %y, 0.5 + %log2 = call float @llvm.log2.f32(float %halfy) + %mul = fmul fast float %x, %log2 + ret float %mul +} + +; C1/X * C2 => (C1*C2) / X + +define float @fdiv_constant_numerator_fmul(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul( +; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]] +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv float 2.0e+3, %x + %t3 = fmul reassoc float %t1, 6.0e+3 + ret float %t3 +} + +; C1/X * C2 => (C1*C2) / X is disabled if C1/X has multiple uses + +@fmul2_external = external global float + +define float @fdiv_constant_numerator_fmul_extra_use(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul_extra_use( +; CHECK-NEXT: [[DIV:%.*]] = fdiv fast float 1.000000e+00, [[X:%.*]] +; CHECK-NEXT: store float [[DIV]], float* @fmul2_external, align 4 +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[DIV]], 2.000000e+00 +; CHECK-NEXT: ret float [[MUL]] +; + %div = fdiv fast float 1.0, %x + store float %div, float* @fmul2_external + %mul = fmul fast float %div, 2.0 + ret float %mul +} + +; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal FP) + +define float @fdiv_constant_denominator_fmul(float %x) { +; CHECK-LABEL: @fdiv_constant_denominator_fmul( +; CHECK-NEXT: [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00 +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv float %x, 2.0e+3 + %t3 = fmul reassoc float %t1, 6.0e+3 + ret float %t3 +} + +define <4 x float> @fdiv_constant_denominator_fmul_vec(<4 x float> %x) { +; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec( +; CHECK-NEXT: [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], +; CHECK-NEXT: ret <4 x float> [[T3]] +; + %t1 = fdiv <4 x float> %x, + %t3 = fmul reassoc <4 x float> %t1, + ret <4 x float> %t3 +} + +; Make sure fmul with constant expression doesn't assert. + +define <4 x float> @fdiv_constant_denominator_fmul_vec_constexpr(<4 x float> %x) { +; CHECK-LABEL: @fdiv_constant_denominator_fmul_vec_constexpr( +; CHECK-NEXT: [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], +; CHECK-NEXT: ret <4 x float> [[T3]] +; + %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> to i160) to i128) to <4 x float> + %t1 = fdiv <4 x float> %x, + %t3 = fmul reassoc <4 x float> %t1, %constExprMul + ret <4 x float> %t3 +} + +; This shows that at least part of instcombine does not check constant +; values to see if it is creating denorms (0x3800000000000000 is a denorm +; for 32-bit float), so protecting against denorms in other parts is +; probably not doing the intended job. + +define float @fmul_constant_reassociation(float %x) { +; CHECK-LABEL: @fmul_constant_reassociation( +; CHECK-NEXT: [[R:%.*]] = fmul reassoc nsz float [[X:%.*]], 0x3800000000000000 +; CHECK-NEXT: ret float [[R]] +; + %mul_flt_min = fmul reassoc nsz float %x, 0x3810000000000000 + %r = fmul reassoc nsz float %mul_flt_min, 0.5 + ret float %r +} + +; Canonicalization "X/C1 * C2 => X * (C2/C1)" still applies if C2/C1 is denormal +; (otherwise, we should not have allowed the reassociation in the previous test). +; 0x3810000000000000 == FLT_MIN + +define float @fdiv_constant_denominator_fmul_denorm(float %x) { +; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm( +; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[X:%.*]], 0x3760620000000000 +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv float %x, 2.0e+3 + %t3 = fmul fast float %t1, 0x3810000000000000 + ret float %t3 +} + +; X / C1 * C2 => X / (C2/C1) if C1/C2 is abnormal, but C2/C1 is a normal value. +; TODO: We don't convert the fast fdiv to fmul because that would be multiplication +; by a denormal, but we could do better when we know that denormals are not a problem. + +define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) { +; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder( +; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000 +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv float %x, 3.0 + %t3 = fmul reassoc float %t1, 0x3810000000000000 + ret float %t3 +} + +; Negative test: we should not have 2 divisions instead of the 1 we started with. + +define float @fdiv_constant_denominator_fmul_denorm_try_harder_extra_use(float %x) { +; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder_extra_use( +; CHECK-NEXT: [[T1:%.*]] = fdiv float [[X:%.*]], 3.000000e+00 +; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[T1]], 0x3810000000000000 +; CHECK-NEXT: [[R:%.*]] = fadd float [[T1]], [[T3]] +; CHECK-NEXT: ret float [[R]] +; + %t1 = fdiv float %x, 3.0e+0 + %t3 = fmul fast float %t1, 0x3810000000000000 + %r = fadd float %t1, %t3 + ret float %r +} + +; (X + C1) * C2 --> (X * C2) + C1*C2 + +define float @fmul_fadd_distribute(float %x) { +; CHECK-LABEL: @fmul_fadd_distribute( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00 +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], 6.000000e+00 +; CHECK-NEXT: ret float [[T3]] +; + %t2 = fadd float %x, 2.0 + %t3 = fmul reassoc float %t2, 3.0 + ret float %t3 +} + +; (X - C1) * C2 --> (X * C2) - C1*C2 + +define float @fmul_fsub_distribute1(float %x) { +; CHECK-LABEL: @fmul_fsub_distribute1( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00 +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], -6.000000e+00 +; CHECK-NEXT: ret float [[T3]] +; + %t2 = fsub float %x, 2.0 + %t3 = fmul reassoc float %t2, 3.0 + ret float %t3 +} + +; (C1 - X) * C2 --> C1*C2 - (X * C2) + +define float @fmul_fsub_distribute2(float %x) { +; CHECK-LABEL: @fmul_fsub_distribute2( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00 +; CHECK-NEXT: [[T3:%.*]] = fsub reassoc float 6.000000e+00, [[TMP1]] +; CHECK-NEXT: ret float [[T3]] +; + %t2 = fsub float 2.0, %x + %t3 = fmul reassoc float %t2, 3.0 + ret float %t3 +} + +; FIXME: This should only need 'reassoc'. +; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) + +define float @fmul_fadd_fmul_distribute(float %x) { +; CHECK-LABEL: @fmul_fadd_fmul_distribute( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01 +; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+01 +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fmul float %x, 6.0 + %t2 = fadd float %t1, 2.0 + %t3 = fmul fast float %t2, 5.0 + ret float %t3 +} + +define float @fmul_fadd_distribute_extra_use(float %x) { +; CHECK-LABEL: @fmul_fadd_distribute_extra_use( +; CHECK-NEXT: [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00 +; CHECK-NEXT: [[T2:%.*]] = fadd float [[T1]], 2.000000e+00 +; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[T2]], 5.000000e+00 +; CHECK-NEXT: call void @use_f32(float [[T2]]) +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fmul float %x, 6.0 + %t2 = fadd float %t1, 2.0 + %t3 = fmul fast float %t2, 5.0 + call void @use_f32(float %t2) + ret float %t3 +} + +; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3 +; 0x10000000000000 = DBL_MIN +; TODO: We don't convert the fast fdiv to fmul because that would be multiplication +; by a denormal, but we could do better when we know that denormals are not a problem. + +define double @fmul_fadd_fdiv_distribute2(double %x) { +; CHECK-LABEL: @fmul_fadd_fdiv_distribute2( +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000 +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000 +; CHECK-NEXT: ret double [[T3]] +; + %t1 = fdiv double %x, 3.0 + %t2 = fadd double %t1, 5.0 + %t3 = fmul reassoc double %t2, 0x10000000000000 + ret double %t3 +} + +; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot +; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN) + +define double @fmul_fadd_fdiv_distribute3(double %x) { +; CHECK-LABEL: @fmul_fadd_fdiv_distribute3( +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000 +; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000 +; CHECK-NEXT: ret double [[T3]] +; + %t1 = fdiv double %x, 3.0 + %t2 = fadd double %t1, 5.0 + %t3 = fmul reassoc double %t2, 0x10000000000000 + ret double %t3 +} + +; FIXME: This should only need 'reassoc'. +; (C2 - (X*C1)) * C3 => (C2*C3) - (X * (C1*C3)) + +define float @fmul_fsub_fmul_distribute(float %x) { +; CHECK-LABEL: @fmul_fsub_fmul_distribute( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01 +; CHECK-NEXT: [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]] +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fmul float %x, 6.0 + %t2 = fsub float 2.0, %t1 + %t3 = fmul fast float %t2, 5.0 + ret float %t3 +} + +define float @fmul_fsub_fmul_distribute_extra_use(float %x) { +; CHECK-LABEL: @fmul_fsub_fmul_distribute_extra_use( +; CHECK-NEXT: [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00 +; CHECK-NEXT: [[T2:%.*]] = fsub float 2.000000e+00, [[T1]] +; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[T2]], 5.000000e+00 +; CHECK-NEXT: call void @use_f32(float [[T2]]) +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fmul float %x, 6.0 + %t2 = fsub float 2.0, %t1 + %t3 = fmul fast float %t2, 5.0 + call void @use_f32(float %t2) + ret float %t3 +} + +; FIXME: This should only need 'reassoc'. +; ((X*C1) - C2) * C3 => (X * (C1*C3)) - C2*C3 + +define float @fmul_fsub_fmul_distribute2(float %x) { +; CHECK-LABEL: @fmul_fsub_fmul_distribute2( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 3.000000e+01 +; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], -1.000000e+01 +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fmul float %x, 6.0 + %t2 = fsub float %t1, 2.0 + %t3 = fmul fast float %t2, 5.0 + ret float %t3 +} + +define float @fmul_fsub_fmul_distribute2_extra_use(float %x) { +; CHECK-LABEL: @fmul_fsub_fmul_distribute2_extra_use( +; CHECK-NEXT: [[T1:%.*]] = fmul float [[X:%.*]], 6.000000e+00 +; CHECK-NEXT: [[T2:%.*]] = fsub float 2.000000e+00, [[T1]] +; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[T2]], 5.000000e+00 +; CHECK-NEXT: call void @use_f32(float [[T2]]) +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fmul float %x, 6.0 + %t2 = fsub float 2.0, %t1 + %t3 = fmul fast float %t2, 5.0 + call void @use_f32(float %t2) + ret float %t3 +} + +; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses + +define float @common_factor(float %x, float %y) { +; CHECK-LABEL: @common_factor( +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL1:%.*]] = fmul fast float [[MUL]], [[X]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL1]], [[MUL]] +; CHECK-NEXT: ret float [[ADD]] +; + %mul = fmul float %x, %y + %mul1 = fmul fast float %mul, %x + %add = fadd float %mul1, %mul + ret float %add +} + +define double @fmul_fdiv_factor_squared(double %x, double %y) { +; CHECK-LABEL: @fmul_fdiv_factor_squared( +; CHECK-NEXT: [[DIV:%.*]] = fdiv fast double [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SQUARED:%.*]] = fmul fast double [[DIV]], [[DIV]] +; CHECK-NEXT: ret double [[SQUARED]] +; + %div = fdiv fast double %x, %y + %squared = fmul fast double %div, %div + ret double %squared +} + +define double @fmul_fdivs_factor_common_denominator(double %x, double %y, double %z) { +; CHECK-LABEL: @fmul_fdivs_factor_common_denominator( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[Z:%.*]], [[Z]] +; CHECK-NEXT: [[MUL:%.*]] = fdiv fast double [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret double [[MUL]] +; + %div1 = fdiv double %x, %z + %div2 = fdiv double %y, %z + %mul = fmul fast double %div1, %div2 + ret double %mul +} + +define double @fmul_fdivs_factor(double %x, double %y, double %z, double %w) { +; CHECK-LABEL: @fmul_fdivs_factor( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc double [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc double [[TMP1]], [[W:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: ret double [[MUL]] +; + %div1 = fdiv double %x, %y + %div2 = fdiv double %z, %w + %mul = fmul reassoc double %div1, %div2 + ret double %mul +} + +define double @fmul_fdiv_factor(double %x, double %y, double %z) { +; CHECK-LABEL: @fmul_fdiv_factor( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc double [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret double [[MUL]] +; + %div = fdiv double %x, %y + %mul = fmul reassoc double %div, %z + ret double %mul +} + +define double @fmul_fdiv_factor_constant1(double %x, double %y) { +; CHECK-LABEL: @fmul_fdiv_factor_constant1( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: ret double [[MUL]] +; + %div = fdiv double %x, %y + %mul = fmul reassoc double %div, 42.0 + ret double %mul +} + +define <2 x float> @fmul_fdiv_factor_constant2(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @fmul_fdiv_factor_constant2( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc <2 x float> [[TMP1]], +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %div = fdiv <2 x float> %x, + %mul = fmul reassoc <2 x float> %div, %y + ret <2 x float> %mul +} + +define float @fmul_fdiv_factor_extra_use(float %x, float %y) { +; CHECK-LABEL: @fmul_fdiv_factor_extra_use( +; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: call void @use_f32(float [[DIV]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc float [[DIV]], [[Y:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %div = fdiv float %x, 42.0 + call void @use_f32(float %div) + %mul = fmul reassoc float %div, %y + ret float %mul +} + +; Avoid infinite looping by moving negation out of a constant expression. + +@g = external global {[2 x i8*]}, align 1 + +define double @fmul_negated_constant_expression(double %x) { +; CHECK-LABEL: @fmul_negated_constant_expression( +; CHECK-NEXT: [[R:%.*]] = fmul double [[X:%.*]], fsub (double -0.000000e+00, double bitcast (i64 ptrtoint (i8** getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @g, i64 0, inrange i32 0, i64 2) to i64) to double)) +; CHECK-NEXT: ret double [[R]] +; + %r = fmul double %x, fsub (double -0.000000e+00, double bitcast (i64 ptrtoint (i8** getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @g, i64 0, inrange i32 0, i64 2) to i64) to double)) + ret double %r +} + +define float @negate_if_true(float %x, i1 %cond) { +; CHECK-LABEL: @negate_if_true( +; CHECK-NEXT: [[TMP1:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND:%.*]], float [[TMP1]], float [[X]] +; CHECK-NEXT: ret float [[TMP2]] +; + %sel = select i1 %cond, float -1.0, float 1.0 + %r = fmul float %sel, %x + ret float %r +} + +define float @negate_if_false(float %x, i1 %cond) { +; CHECK-LABEL: @negate_if_false( +; CHECK-NEXT: [[TMP1:%.*]] = fneg arcp float [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select arcp i1 [[COND:%.*]], float [[X]], float [[TMP1]] +; CHECK-NEXT: ret float [[TMP2]] +; + %sel = select i1 %cond, float 1.0, float -1.0 + %r = fmul arcp float %sel, %x + ret float %r +} + +define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) { +; CHECK-LABEL: @negate_if_true_commute( +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg ninf <2 x double> [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = select ninf i1 [[COND:%.*]], <2 x double> [[TMP1]], <2 x double> [[X]] +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization + %sel = select i1 %cond, <2 x double> , <2 x double> + %r = fmul ninf <2 x double> %x, %sel + ret <2 x double> %r +} + +define <2 x double> @negate_if_false_commute(<2 x double> %px, <2 x i1> %cond) { +; CHECK-LABEL: @negate_if_false_commute( +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x double> [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[COND:%.*]], <2 x double> [[X]], <2 x double> [[TMP1]] +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization + %sel = select <2 x i1> %cond, <2 x double> , <2 x double> + %r = fmul <2 x double> %x, %sel + ret <2 x double> %r +} + +; Negative test + +define float @negate_if_true_extra_use(float %x, i1 %cond) { +; CHECK-LABEL: @negate_if_true_extra_use( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], float -1.000000e+00, float 1.000000e+00 +; CHECK-NEXT: call void @use_f32(float [[SEL]]) +; CHECK-NEXT: [[R:%.*]] = fmul float [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret float [[R]] +; + %sel = select i1 %cond, float -1.0, float 1.0 + call void @use_f32(float %sel) + %r = fmul float %sel, %x + ret float %r +} + +; Negative test + +define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) { +; CHECK-LABEL: @negate_if_true_wrong_constant( +; CHECK-NEXT: [[X:%.*]] = fdiv <2 x double> , [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> , <2 x double> +; CHECK-NEXT: [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]] +; CHECK-NEXT: ret <2 x double> [[R]] +; + %x = fdiv <2 x double> , %px ; thwart complexity-based canonicalization + %sel = select i1 %cond, <2 x double> , <2 x double> + %r = fmul <2 x double> %x, %sel + ret <2 x double> %r +} + +; X *fast (C ? 1.0 : 0.0) -> C ? X : 0.0 +define float @fmul_select(float %x, i1 %c) { +; CHECK-LABEL: @fmul_select( +; CHECK-NEXT: [[MUL:%.*]] = select fast i1 [[C:%.*]], float [[X:%.*]], float 0.000000e+00 +; CHECK-NEXT: ret float [[MUL]] +; + %sel = select i1 %c, float 1.0, float 0.0 + %mul = fmul fast float %sel, %x + ret float %mul +} + +; X *fast (C ? 1.0 : 0.0) -> C ? X : 0.0 +define <2 x float> @fmul_select_vec(<2 x float> %x, i1 %c) { +; CHECK-LABEL: @fmul_select_vec( +; CHECK-NEXT: [[MUL:%.*]] = select fast i1 [[C:%.*]], <2 x float> [[X:%.*]], <2 x float> zeroinitializer +; CHECK-NEXT: ret <2 x float> [[MUL]] +; + %sel = select i1 %c, <2 x float> , <2 x float> zeroinitializer + %mul = fmul fast <2 x float> %sel, %x + ret <2 x float> %mul +} + +; Without fast math flags we can't optimize X * (C ? 1.0 : 0.0) -> C ? X : 0.0 +define float @fmul_select_strict(float %x, i1 %c) { +; CHECK-LABEL: @fmul_select_strict( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], float 1.000000e+00, float 0.000000e+00 +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret float [[MUL]] +; + %sel = select i1 %c, float 1.0, float 0.0 + %mul = fmul float %sel, %x + ret float %mul +} + +; sqrt(X) *fast (C ? sqrt(X) : 1.0) -> C ? X : sqrt(X) +define double @fmul_sqrt_select(double %x, i1 %c) { +; CHECK-LABEL: @fmul_sqrt_select( +; CHECK-NEXT: [[SQR:%.*]] = call double @llvm.sqrt.f64(double [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = select fast i1 [[C:%.*]], double [[X]], double [[SQR]] +; CHECK-NEXT: ret double [[MUL]] +; + %sqr = call double @llvm.sqrt.f64(double %x) + %sel = select i1 %c, double %sqr, double 1.0 + %mul = fmul fast double %sqr, %sel + ret double %mul +} + +; fastmath => z * splat(0) = splat(0), even for scalable vectors +define @mul_scalable_splat_zero( %z) { +; CHECK-LABEL: @mul_scalable_splat_zero( +; CHECK-NEXT: ret zeroinitializer +; + %shuf = shufflevector insertelement ( undef, float 0.0, i32 0), poison, zeroinitializer + %t3 = fmul fast %shuf, %z + ret %t3 +} diff --git a/llvm/test/Transforms/InstCombine/icmp-bc-vec-inseltpoison.ll b/llvm/test/Transforms/InstCombine/icmp-bc-vec-inseltpoison.ll index 5df1109..b7e61eb 100644 --- a/llvm/test/Transforms/InstCombine/icmp-bc-vec-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/icmp-bc-vec-inseltpoison.ll @@ -18,7 +18,7 @@ define i1 @test_i1_0(i1 %val) { ; CHECK-NEXT: ret i1 [[COND]] ; %insvec = insertelement <4 x i1> poison, i1 %val, i32 0 - %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer + %vec = shufflevector <4 x i1> %insvec, <4 x i1> poison, <4 x i32> zeroinitializer %cast = bitcast <4 x i1> %vec to i4 %cond = icmp eq i4 %cast, 0 ret i1 %cond @@ -30,7 +30,7 @@ define i1 @test_i1_0_2(i1 %val) { ; CHECK-NEXT: ret i1 [[COND]] ; %insvec = insertelement <4 x i1> poison, i1 %val, i32 2 - %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> + %vec = shufflevector <4 x i1> %insvec, <4 x i1> poison, <4 x i32> %cast = bitcast <4 x i1> %vec to i4 %cond = icmp eq i4 %cast, 0 ret i1 %cond @@ -41,7 +41,7 @@ define i1 @test_i1_m1(i1 %val) { ; CHECK-NEXT: ret i1 [[VAL:%.*]] ; %insvec = insertelement <4 x i1> poison, i1 %val, i32 0 - %vec = shufflevector <4 x i1> %insvec, <4 x i1> undef, <4 x i32> zeroinitializer + %vec = shufflevector <4 x i1> %insvec, <4 x i1> poison, <4 x i32> zeroinitializer %cast = bitcast <4 x i1> %vec to i4 %cond = icmp eq i4 %cast, -1 ret i1 %cond @@ -53,7 +53,7 @@ define i1 @test_i8_pattern(i8 %val) { ; CHECK-NEXT: ret i1 [[COND]] ; %insvec = insertelement <4 x i8> poison, i8 %val, i32 0 - %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer + %vec = shufflevector <4 x i8> %insvec, <4 x i8> poison, <4 x i32> zeroinitializer %cast = bitcast <4 x i8> %vec to i32 %cond = icmp eq i32 %cast, 1212696648 ret i1 %cond @@ -65,7 +65,7 @@ define i1 @test_i8_pattern_2(i8 %val) { ; CHECK-NEXT: ret i1 [[COND]] ; %insvec = insertelement <4 x i8> poison, i8 %val, i32 2 - %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> + %vec = shufflevector <4 x i8> %insvec, <4 x i8> poison, <4 x i32> %cast = bitcast <4 x i8> %vec to i32 %cond = icmp eq i32 %cast, 1212696648 ret i1 %cond @@ -74,12 +74,12 @@ define i1 @test_i8_pattern_2(i8 %val) { ; Make sure we don't try to fold if the shufflemask has differing element values define i1 @test_i8_pattern_3(<4 x i8> %invec) { ; CHECK-LABEL: @test_i8_pattern_3( -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INVEC:%.*]], <4 x i8> undef, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INVEC:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696648 ; CHECK-NEXT: ret i1 [[COND]] ; - %vec = shufflevector <4 x i8> %invec, <4 x i8> undef, <4 x i32> + %vec = shufflevector <4 x i8> %invec, <4 x i8> poison, <4 x i32> %cast = bitcast <4 x i8> %vec to i32 %cond = icmp eq i32 %cast, 1212696648 ret i1 %cond @@ -89,13 +89,13 @@ define i1 @test_i8_pattern_3(<4 x i8> %invec) { define i1 @test_i8_nopattern(i8 %val) { ; CHECK-LABEL: @test_i8_nopattern( ; CHECK-NEXT: [[INSVEC:%.*]] = insertelement <4 x i8> poison, i8 [[VAL:%.*]], i32 0 -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INSVEC]], <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i8> [[INSVEC]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[VEC]] to i32 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[CAST]], 1212696647 ; CHECK-NEXT: ret i1 [[COND]] ; %insvec = insertelement <4 x i8> poison, i8 %val, i32 0 - %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer + %vec = shufflevector <4 x i8> %insvec, <4 x i8> poison, <4 x i32> zeroinitializer %cast = bitcast <4 x i8> %vec to i32 %cond = icmp eq i32 %cast, 1212696647 ret i1 %cond @@ -108,7 +108,7 @@ define i1 @test_i8_ult_pattern(i8 %val) { ; CHECK-NEXT: ret i1 [[COND]] ; %insvec = insertelement <4 x i8> poison, i8 %val, i32 0 - %vec = shufflevector <4 x i8> %insvec, <4 x i8> undef, <4 x i32> zeroinitializer + %vec = shufflevector <4 x i8> %insvec, <4 x i8> poison, <4 x i32> zeroinitializer %cast = bitcast <4 x i8> %vec to i32 %cond = icmp ult i32 %cast, 1212696648 ret i1 %cond @@ -120,7 +120,7 @@ define i1 @extending_shuffle_with_weird_types(<2 x i9> %v) { ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i9 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; - %splat = shufflevector <2 x i9> %v, <2 x i9> undef, <3 x i32> zeroinitializer + %splat = shufflevector <2 x i9> %v, <2 x i9> poison, <3 x i32> zeroinitializer %cast = bitcast <3 x i9> %splat to i27 %cmp = icmp slt i27 %cast, 262657 ; 0x040201 ret i1 %cmp diff --git a/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll new file mode 100644 index 0000000..6fa9cfd --- /dev/null +++ b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll @@ -0,0 +1,375 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Canonicalize vector ge/le comparisons with constants to gt/lt. + +; Normal types are ConstantDataVectors. Test the constant values adjacent to the +; min/max values that we're not allowed to transform. + +define <2 x i1> @sge(<2 x i8> %x) { +; CHECK-LABEL: @sge( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sge <2 x i8> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @uge(<2 x i8> %x) { +; CHECK-LABEL: @uge( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp uge <2 x i8> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @sle(<2 x i8> %x) { +; CHECK-LABEL: @sle( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sle <2 x i8> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @ule(<2 x i8> %x) { +; CHECK-LABEL: @ule( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp ule <2 x i8> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @ult_min_signed_value(<2 x i8> %x) { +; CHECK-LABEL: @ult_min_signed_value( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp ult <2 x i8> %x, + ret <2 x i1> %cmp +} + +; Zeros are special: they're ConstantAggregateZero. + +define <2 x i1> @sge_zero(<2 x i8> %x) { +; CHECK-LABEL: @sge_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sge <2 x i8> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @uge_zero(<2 x i8> %x) { +; CHECK-LABEL: @uge_zero( +; CHECK-NEXT: ret <2 x i1> +; + %cmp = icmp uge <2 x i8> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @sle_zero(<2 x i8> %x) { +; CHECK-LABEL: @sle_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sle <2 x i8> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @ule_zero(<2 x i8> %x) { +; CHECK-LABEL: @ule_zero( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp ule <2 x i8> %x, + ret <2 x i1> %cmp +} + +; Weird types are ConstantVectors, not ConstantDataVectors. For an i3 type: +; Signed min = -4 +; Unsigned min = 0 +; Signed max = 3 +; Unsigned max = 7 + +define <3 x i1> @sge_weird(<3 x i3> %x) { +; CHECK-LABEL: @sge_weird( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i3> [[X:%.*]], +; CHECK-NEXT: ret <3 x i1> [[CMP]] +; + %cmp = icmp sge <3 x i3> %x, + ret <3 x i1> %cmp +} + +define <3 x i1> @uge_weird(<3 x i3> %x) { +; CHECK-LABEL: @uge_weird( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <3 x i3> [[X:%.*]], +; CHECK-NEXT: ret <3 x i1> [[CMP]] +; + %cmp = icmp uge <3 x i3> %x, + ret <3 x i1> %cmp +} + +define <3 x i1> @sle_weird(<3 x i3> %x) { +; CHECK-LABEL: @sle_weird( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i3> [[X:%.*]], +; CHECK-NEXT: ret <3 x i1> [[CMP]] +; + %cmp = icmp sle <3 x i3> %x, + ret <3 x i1> %cmp +} + +define <3 x i1> @ule_weird(<3 x i3> %x) { +; CHECK-LABEL: @ule_weird( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <3 x i3> [[X:%.*]], +; CHECK-NEXT: ret <3 x i1> [[CMP]] +; + %cmp = icmp ule <3 x i3> %x, + ret <3 x i1> %cmp +} + +; We can't do the transform if any constants are already at the limits. + +define <2 x i1> @sge_min(<2 x i3> %x) { +; CHECK-LABEL: @sge_min( +; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i3> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sge <2 x i3> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @uge_min(<2 x i3> %x) { +; CHECK-LABEL: @uge_min( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge <2 x i3> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp uge <2 x i3> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @sle_max(<2 x i3> %x) { +; CHECK-LABEL: @sle_max( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i3> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sle <2 x i3> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @ule_max(<2 x i3> %x) { +; CHECK-LABEL: @ule_max( +; CHECK-NEXT: [[CMP:%.*]] = icmp ule <2 x i3> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp ule <2 x i3> %x, + ret <2 x i1> %cmp +} + +define <2 x i1> @PR27756_1(<2 x i8> %a) { +; CHECK-LABEL: @PR27756_1( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sle <2 x i8> %a, to i8), i8 0> + ret <2 x i1> %cmp +} + +; Undef elements don't prevent the transform of the comparison. + +define <3 x i1> @PR27756_2(<3 x i8> %a) { +; CHECK-LABEL: @PR27756_2( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i8> [[A:%.*]], +; CHECK-NEXT: ret <3 x i1> [[CMP]] +; + %cmp = icmp sle <3 x i8> %a, + ret <3 x i1> %cmp +} + +define <3 x i1> @PR27756_3(<3 x i8> %a) { +; CHECK-LABEL: @PR27756_3( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i8> [[A:%.*]], +; CHECK-NEXT: ret <3 x i1> [[CMP]] +; + %cmp = icmp sge <3 x i8> %a, + ret <3 x i1> %cmp +} + +@someglobal = global i32 0 + +define <2 x i1> @PR27786(<2 x i8> %a) { +; CHECK-LABEL: @PR27786( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i8> [[A:%.*]], bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>) +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %cmp = icmp sle <2 x i8> %a, bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>) + ret <2 x i1> %cmp +} + +; This is similar to a transform for shuffled binops: compare first, shuffle after. + +define <4 x i1> @same_shuffle_inputs_icmp(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @same_shuffle_inputs_icmp( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 2, i32 0 > + %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 2, i32 0 > + %cmp = icmp sgt <4 x i8> %shufx, %shufy + ret <4 x i1> %cmp +} + +; fcmp and size-changing shuffles are ok too. + +define <5 x i1> @same_shuffle_inputs_fcmp(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: @same_shuffle_inputs_fcmp( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <4 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <5 x i32> +; CHECK-NEXT: ret <5 x i1> [[CMP]] +; + %shufx = shufflevector <4 x float> %x, <4 x float> poison, <5 x i32> < i32 0, i32 1, i32 3, i32 2, i32 0 > + %shufy = shufflevector <4 x float> %y, <4 x float> poison, <5 x i32> < i32 0, i32 1, i32 3, i32 2, i32 0 > + %cmp = fcmp oeq <5 x float> %shufx, %shufy + ret <5 x i1> %cmp +} + +declare void @use_v4i8(<4 x i8>) + +define <4 x i1> @same_shuffle_inputs_icmp_extra_use1(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use1( +; CHECK-NEXT: [[SHUFX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i8> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SHUFX]]) +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > + %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > + %cmp = icmp ugt <4 x i8> %shufx, %shufy + call void @use_v4i8(<4 x i8> %shufx) + ret <4 x i1> %cmp +} + +declare void @use_v2i8(<2 x i8>) + +define <2 x i1> @same_shuffle_inputs_icmp_extra_use2(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use2( +; CHECK-NEXT: [[SHUFY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[X:%.*]], [[Y]] +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <2 x i32> +; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[SHUFY]]) +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <2 x i32> < i32 3, i32 2 > + %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <2 x i32> < i32 3, i32 2 > + %cmp = icmp eq <2 x i8> %shufx, %shufy + call void @use_v2i8(<2 x i8> %shufy) + ret <2 x i1> %cmp +} + +; Negative test: if both shuffles have extra uses, don't transform because that would increase instruction count. + +define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @same_shuffle_inputs_icmp_extra_use3( +; CHECK-NEXT: [[SHUFX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[SHUFY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i8> [[SHUFX]], [[SHUFY]] +; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[SHUFX]]) +; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[SHUFY]]) +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %shufx = shufflevector <4 x i8> %x, <4 x i8> poison, <2 x i32> < i32 0, i32 0 > + %shufy = shufflevector <4 x i8> %y, <4 x i8> poison, <2 x i32> < i32 0, i32 0 > + %cmp = icmp eq <2 x i8> %shufx, %shufy + call void @use_v2i8(<2 x i8> %shufx) + call void @use_v2i8(<2 x i8> %shufy) + ret <2 x i1> %cmp +} + +define <4 x i1> @splat_icmp(<4 x i8> %x) { +; CHECK-LABEL: @splat_icmp( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %cmp = icmp sgt <4 x i8> %splatx, + ret <4 x i1> %cmp +} + +define <4 x i1> @splat_icmp_undef(<4 x i8> %x) { +; CHECK-LABEL: @splat_icmp_undef( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %cmp = icmp ult <4 x i8> %splatx, + ret <4 x i1> %cmp +} + +define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) { +; CHECK-LABEL: @splat_icmp_larger_size( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %splatx = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> + %cmp = icmp eq <4 x i8> %splatx, + ret <4 x i1> %cmp +} + +define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) { +; CHECK-LABEL: @splat_fcmp_smaller_size( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %splatx = shufflevector <5 x float> %x, <5 x float> poison, <4 x i32> + %cmp = fcmp oeq <4 x float> %splatx, + ret <4 x i1> %cmp +} + +; Negative test + +define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) { +; CHECK-LABEL: @splat_icmp_extra_use( +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: call void @use_v4i8(<4 x i8> [[SPLATX]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + call void @use_v4i8(<4 x i8> %splatx) + %cmp = icmp sgt <4 x i8> %splatx, + ret <4 x i1> %cmp +} + +; Negative test + +define <4 x i1> @not_splat_icmp(<4 x i8> %x) { +; CHECK-LABEL: @not_splat_icmp( +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %cmp = icmp sgt <4 x i8> %splatx, + ret <4 x i1> %cmp +} + +; Negative test + +define <4 x i1> @not_splat_icmp2(<4 x i8> %x) { +; CHECK-LABEL: @not_splat_icmp2( +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], +; CHECK-NEXT: ret <4 x i1> [[CMP]] +; + %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + %cmp = icmp sgt <4 x i8> %splatx, + ret <4 x i1> %cmp +} diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll index 64958b9..5e102c4 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll @@ -254,7 +254,7 @@ bb1: br label %bb2 bb2: - %widen = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> + %widen = shufflevector <2 x float> %x, <2 x float> poison, <4 x i32> %ext2 = extractelement <4 x float> %widen, i32 0 %ins1 = insertelement <4 x float> , float %ext2, i32 2 %ins2 = insertelement <4 x float> %ins1, float %ext1, i32 3 @@ -432,7 +432,7 @@ define <4 x float> @insert_nonzero_index_splat(float %x) { ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> poison, float %x, i32 2 - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> ret <4 x float> %splat } @@ -443,7 +443,7 @@ define <3 x double> @insert_nonzero_index_splat_narrow(double %x) { ; CHECK-NEXT: ret <3 x double> [[SPLAT]] ; %xv = insertelement <4 x double> poison, double %x, i32 3 - %splat = shufflevector <4 x double> %xv, <4 x double> undef, <3 x i32> + %splat = shufflevector <4 x double> %xv, <4 x double> poison, <3 x i32> ret <3 x double> %splat } @@ -454,7 +454,7 @@ define <5 x i7> @insert_nonzero_index_splat_widen(i7 %x) { ; CHECK-NEXT: ret <5 x i7> [[SPLAT]] ; %xv = insertelement <4 x i7> poison, i7 %x, i32 1 - %splat = shufflevector <4 x i7> %xv, <4 x i7> undef, <5 x i32> + %splat = shufflevector <4 x i7> %xv, <4 x i7> poison, <5 x i32> ret <5 x i7> %splat } @@ -464,12 +464,12 @@ define <4 x float> @insert_nonzero_index_splat_extra_use(float %x) { ; CHECK-LABEL: @insert_nonzero_index_splat_extra_use( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 2 ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> poison, float %x, i32 2 call void @use(<4 x float> %xv) - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> ret <4 x float> %splat } @@ -478,11 +478,11 @@ define <4 x float> @insert_nonzero_index_splat_extra_use(float %x) { define <4 x float> @insert_nonzero_index_splat_wrong_base(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_nonzero_index_splat_wrong_base( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 2 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> %y, float %x, i32 2 - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> ret <4 x float> %splat } @@ -491,11 +491,11 @@ define <4 x float> @insert_nonzero_index_splat_wrong_base(float %x, <4 x float> define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index) { ; CHECK-LABEL: @insert_nonzero_index_splat_wrong_index( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 [[INDEX:%.*]] -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> poison, float %x, i32 %index - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> ret <4 x float> %splat } @@ -506,7 +506,7 @@ define <4 x float> @insert_in_splat(float %x) { ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> poison, float %x, i32 0 - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> %r = insertelement <4 x float> %splat, float %x, i32 3 ret <4 x float> %r } @@ -515,14 +515,14 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { ; CHECK-LABEL: @insert_in_splat_extra_uses( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0 ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x float> [[SPLAT]]) ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> poison, float %x, i32 0 call void @use(<4 x float> %xv) - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> call void @use(<4 x float> %splat) %r = insertelement <4 x float> %splat, float %x, i32 3 ret <4 x float> %r @@ -533,12 +533,12 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { ; CHECK-LABEL: @insert_in_splat_variable_index( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 [[Y:%.*]] ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> poison, float %x, i32 0 - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> %r = insertelement <4 x float> %splat, float %x, i32 %y ret <4 x float> %r } @@ -563,23 +563,23 @@ define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_in_nonsplat2( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3 ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> %y, float %x, i32 0 - %splat = shufflevector <4 x float> %xv, <4 x float> undef, <4 x i32> + %splat = shufflevector <4 x float> %xv, <4 x float> poison, <4 x i32> %r = insertelement <4 x float> %splat, float %x, i32 3 ret <4 x float> %r } define <4 x i8> @shuf_identity_padding(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_padding( -; CHECK-NEXT: [[V1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 2 ; CHECK-NEXT: ret <4 x i8> [[V2]] ; - %v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> + %v0 = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> %x1 = extractelement <2 x i8> %x, i32 1 %v1 = insertelement <4 x i8> %v0, i8 %x1, i32 1 %v2 = insertelement <4 x i8> %v1, i8 %y, i32 2 @@ -588,11 +588,11 @@ define <4 x i8> @shuf_identity_padding(<2 x i8> %x, i8 %y) { define <3 x i8> @shuf_identity_extract(<4 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_extract( -; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <3 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <3 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <3 x i8> [[V1]], i8 [[Y:%.*]], i32 2 ; CHECK-NEXT: ret <3 x i8> [[V2]] ; - %v0 = shufflevector <4 x i8> %x, <4 x i8> undef, <3 x i32> + %v0 = shufflevector <4 x i8> %x, <4 x i8> poison, <3 x i32> %x1 = extractelement <4 x i8> %x, i32 1 %v1 = insertelement <3 x i8> %v0, i8 %x1, i32 1 %v2 = insertelement <3 x i8> %v1, i8 %y, i32 2 @@ -601,13 +601,13 @@ define <3 x i8> @shuf_identity_extract(<4 x i8> %x, i8 %y) { define <4 x float> @shuf_identity_extract_extra_use(<6 x float> %x, float %y) { ; CHECK-LABEL: @shuf_identity_extract_extra_use( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <6 x float> [[X:%.*]], <6 x float> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <6 x float> [[X:%.*]], <6 x float> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x float> [[V0]]) -; CHECK-NEXT: [[V1:%.*]] = shufflevector <6 x float> [[X]], <6 x float> undef, <4 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <6 x float> [[X]], <6 x float> poison, <4 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[Y:%.*]], i32 1 ; CHECK-NEXT: ret <4 x float> [[V2]] ; - %v0 = shufflevector <6 x float> %x, <6 x float> undef, <4 x i32> + %v0 = shufflevector <6 x float> %x, <6 x float> poison, <4 x i32> call void @use(<4 x float> %v0) %x1 = extractelement <6 x float> %x, i32 2 %v1 = insertelement <4 x float> %v0, float %x1, i32 2 @@ -619,13 +619,13 @@ define <4 x float> @shuf_identity_extract_extra_use(<6 x float> %x, float %y) { define <4 x i8> @shuf_identity_padding_variable_index(<2 x i8> %x, i8 %y, i32 %index) { ; CHECK-LABEL: @shuf_identity_padding_variable_index( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i32 [[INDEX:%.*]] ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 [[INDEX]] ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 2 ; CHECK-NEXT: ret <4 x i8> [[V2]] ; - %v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> + %v0 = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> %x1 = extractelement <2 x i8> %x, i32 %index %v1 = insertelement <4 x i8> %v0, i8 %x1, i32 %index %v2 = insertelement <4 x i8> %v1, i8 %y, i32 2 @@ -636,13 +636,13 @@ define <4 x i8> @shuf_identity_padding_variable_index(<2 x i8> %x, i8 %y, i32 %i define <4 x i8> @shuf_identity_padding_wrong_source_vec(<2 x i8> %x, i8 %y, <2 x i8> %other) { ; CHECK-LABEL: @shuf_identity_padding_wrong_source_vec( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[OTHER:%.*]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 1 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 2 ; CHECK-NEXT: ret <4 x i8> [[V2]] ; - %v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> + %v0 = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> %x1 = extractelement <2 x i8> %other, i32 1 %v1 = insertelement <4 x i8> %v0, i8 %x1, i32 1 %v2 = insertelement <4 x i8> %v1, i8 %y, i32 2 @@ -653,13 +653,13 @@ define <4 x i8> @shuf_identity_padding_wrong_source_vec(<2 x i8> %x, i8 %y, <2 x define <4 x i8> @shuf_identity_padding_wrong_index(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_padding_wrong_index( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i32 1 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 2 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i32 3 ; CHECK-NEXT: ret <4 x i8> [[V2]] ; - %v0 = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> + %v0 = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> %x1 = extractelement <2 x i8> %x, i32 1 %v1 = insertelement <4 x i8> %v0, i8 %x1, i32 2 %v2 = insertelement <4 x i8> %v1, i8 %y, i32 3 @@ -729,7 +729,7 @@ define <4 x float> @splat_constant(<4 x float> %x) { ; CHECK-NEXT: ret <4 x float> [[R]] ; %ins3 = insertelement <4 x float> %x, float 3.0, i32 3 - %splat3 = shufflevector <4 x float> %ins3, <4 x float> undef, <4 x i32> + %splat3 = shufflevector <4 x float> %ins3, <4 x float> poison, <4 x i32> %r = fadd <4 x float> %ins3, %splat3 ret <4 x float> %r } diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll new file mode 100644 index 0000000..2f448ce --- /dev/null +++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll @@ -0,0 +1,637 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + + +define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[E:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E]], i32 [[C:%.*]], i32 [[D:%.*]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %e = icmp slt i32 %a, %b + %f = sext i1 %e to i32 + %g = and i32 %c, %f + %h = xor i32 %f, -1 + %i = and i32 %d, %h + %j = or i32 %g, %i + ret i32 %j +} + +define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: [[E_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %e = icmp slt i32 %a, %b + %f = sext i1 %e to i32 + %g = and i32 %c, %f + %h = xor i32 %f, -1 + %i = and i32 %d, %h + %j = or i32 %i, %g + ret i32 %j +} + +define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @goo( +; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %t0 = icmp slt i32 %a, %b + %iftmp.0.0 = select i1 %t0, i32 -1, i32 0 + %t1 = and i32 %iftmp.0.0, %c + %not = xor i32 %iftmp.0.0, -1 + %t2 = and i32 %not, %d + %t3 = or i32 %t1, %t2 + ret i32 %t3 +} + +define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @poo( +; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]] +; CHECK-NEXT: ret i32 [[T3]] +; + %t0 = icmp slt i32 %a, %b + %iftmp.0.0 = select i1 %t0, i32 -1, i32 0 + %t1 = and i32 %iftmp.0.0, %c + %iftmp = select i1 %t0, i32 0, i32 -1 + %t2 = and i32 %iftmp, %d + %t3 = or i32 %t1, %t2 + ret i32 %t3 +} + +; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791 +; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. + +define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @fold_inverted_icmp_preds( +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0 +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2_NOT]], i32 0, i32 [[D:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret i32 [[OR]] +; + %cmp1 = icmp slt i32 %a, %b + %sel1 = select i1 %cmp1, i32 %c, i32 0 + %cmp2 = icmp sge i32 %a, %b + %sel2 = select i1 %cmp2, i32 %d, i32 0 + %or = or i32 %sel1, %sel2 + ret i32 %or +} + +; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. + +define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @fold_inverted_icmp_preds_reverse( +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 [[C:%.*]] +; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2_NOT]], i32 [[D:%.*]], i32 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret i32 [[OR]] +; + %cmp1 = icmp slt i32 %a, %b + %sel1 = select i1 %cmp1, i32 0, i32 %c + %cmp2 = icmp sge i32 %a, %b + %sel2 = select i1 %cmp2, i32 0, i32 %d + %or = or i32 %sel1, %sel2 + ret i32 %or +} + +; TODO: Should fcmp have the same sort of predicate canonicalization as icmp? + +define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) { +; CHECK-LABEL: @fold_inverted_fcmp_preds( +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0 +; CHECK-NEXT: [[CMP2:%.*]] = fcmp uge float [[A]], [[B]] +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 [[D:%.*]], i32 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret i32 [[OR]] +; + %cmp1 = fcmp olt float %a, %b + %sel1 = select i1 %cmp1, i32 %c, i32 0 + %cmp2 = fcmp uge float %a, %b + %sel2 = select i1 %cmp2, i32 %d, i32 0 + %or = or i32 %sel1, %sel2 + ret i32 %or +} + +; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. + +define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { +; CHECK-LABEL: @fold_inverted_icmp_vector_preds( +; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq <2 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1_NOT]], <2 x i32> zeroinitializer, <2 x i32> [[C:%.*]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> [[A]], [[B]] +; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[D:%.*]], <2 x i32> zeroinitializer +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret <2 x i32> [[OR]] +; + %cmp1 = icmp ne <2 x i32> %a, %b + %sel1 = select <2 x i1> %cmp1, <2 x i32> %c, <2 x i32> + %cmp2 = icmp eq <2 x i32> %a, %b + %sel2 = select <2 x i1> %cmp2, <2 x i32> %d, <2 x i32> + %or = or <2 x i32> %sel1, %sel2 + ret <2 x i32> %or +} + +define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: @par( +; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[T0]], i32 [[C:%.*]], i32 [[D:%.*]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %t0 = icmp slt i32 %a, %b + %iftmp.1.0 = select i1 %t0, i32 -1, i32 0 + %t1 = and i32 %iftmp.1.0, %c + %not = xor i32 %iftmp.1.0, -1 + %t2 = and i32 %not, %d + %t3 = or i32 %t1, %t2 + ret i32 %t3 +} + +; In the following tests (8 commutation variants), verify that a bitcast doesn't get +; in the way of a select transform. These bitcasts are common in SSE/AVX and possibly +; other vector code because of canonicalization to i64 elements for vectors. + +; The fptosi instructions are included to avoid commutation canonicalization based on +; operator weight. Using another cast operator ensures that both operands of all logic +; ops are equally weighted, and this ensures that we're testing all commutation +; possibilities. + +define <2 x i64> @bitcast_select_swap0(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap0( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %bc1, %sia + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %bc2, %sib + %or = or <2 x i64> %and1, %and2 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_swap1(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap1( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %bc1, %sia + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %bc2, %sib + %or = or <2 x i64> %and2, %and1 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_swap2(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap2( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %bc1, %sia + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %sib, %bc2 + %or = or <2 x i64> %and1, %and2 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_swap3(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap3( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %bc1, %sia + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %sib, %bc2 + %or = or <2 x i64> %and2, %and1 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_swap4(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap4( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %sia, %bc1 + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %bc2, %sib + %or = or <2 x i64> %and1, %and2 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_swap5(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap5( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %sia, %bc1 + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %bc2, %sib + %or = or <2 x i64> %and2, %and1 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_swap6(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap6( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %sia, %bc1 + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %sib, %bc2 + %or = or <2 x i64> %and1, %and2 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_swap7(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @bitcast_select_swap7( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %sia, %bc1 + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %sib, %bc2 + %or = or <2 x i64> %and2, %and1 + ret <2 x i64> %or +} + +define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: @bitcast_select_multi_uses( +; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP:%.*]] to <4 x i32> +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[BC1]], [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> +; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[BC2]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]] +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[OR]], [[ADD]] +; CHECK-NEXT: ret <2 x i64> [[SUB]] +; + %sext = sext <4 x i1> %cmp to <4 x i32> + %bc1 = bitcast <4 x i32> %sext to <2 x i64> + %and1 = and <2 x i64> %a, %bc1 + %neg = xor <4 x i32> %sext, + %bc2 = bitcast <4 x i32> %neg to <2 x i64> + %and2 = and <2 x i64> %b, %bc2 + %or = or <2 x i64> %and2, %and1 + %add = add <2 x i64> %and2, %bc2 + %sub = sub <2 x i64> %or, %add + ret <2 x i64> %sub +} + +define i1 @bools(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %not = xor i1 %c, -1 + %and1 = and i1 %not, %a + %and2 = and i1 %c, %b + %or = or i1 %and1, %and2 + ret i1 %or +} + +; Form a select if we know we can get replace 2 simple logic ops. + +define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_multi_uses1( +; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[C:%.*]], true +; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 [[A]] +; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]] +; CHECK-NEXT: ret i1 [[XOR]] +; + %not = xor i1 %c, -1 + %and1 = and i1 %not, %a + %and2 = and i1 %c, %b + %or = or i1 %and1, %and2 + %xor = xor i1 %or, %and1 + ret i1 %xor +} + +; Don't replace a cheap logic op with a potentially expensive select +; unless we can also eliminate one of the other original ops. + +define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @bools_multi_uses2( +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] +; CHECK-NEXT: ret i1 [[TMP1]] +; + %not = xor i1 %c, -1 + %and1 = and i1 %not, %a + %and2 = and i1 %c, %b + %or = or i1 %and1, %and2 + %add = add i1 %and1, %and2 + %and3 = and i1 %or, %add + ret i1 %and3 +} + +define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { +; CHECK-LABEL: @vec_of_bools( +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[B:%.*]], <4 x i1> [[A:%.*]] +; CHECK-NEXT: ret <4 x i1> [[TMP1]] +; + %not = xor <4 x i1> %c, + %and1 = and <4 x i1> %not, %a + %and2 = and <4 x i1> %b, %c + %or = or <4 x i1> %and2, %and1 + ret <4 x i1> %or +} + +define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) { +; CHECK-LABEL: @vec_of_casted_bools( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP2]], <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 +; CHECK-NEXT: ret i4 [[TMP4]] +; + %not = xor <4 x i1> %c, + %bc1 = bitcast <4 x i1> %not to i4 + %bc2 = bitcast <4 x i1> %c to i4 + %and1 = and i4 %a, %bc1 + %and2 = and i4 %bc2, %b + %or = or i4 %and1, %and2 + ret i4 %or +} + +; Inverted 'and' constants mean this is a select which is canonicalized to a shuffle. + +define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @vec_sel_consts( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; + %and1 = and <4 x i32> %a, + %and2 = and <4 x i32> %b, + %or = or <4 x i32> %and1, %and2 + ret <4 x i32> %or +} + +define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) { +; CHECK-LABEL: @vec_sel_consts_weird( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> [[A:%.*]], <3 x i129> [[B:%.*]], <3 x i32> +; CHECK-NEXT: ret <3 x i129> [[TMP1]] +; + %and1 = and <3 x i129> %a, + %and2 = and <3 x i129> %b, + %or = or <3 x i129> %and2, %and1 + ret <3 x i129> %or +} + +; The mask elements must be inverted for this to be a select. + +define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @vec_not_sel_consts( +; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[B:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %and1 = and <4 x i32> %a, + %and2 = and <4 x i32> %b, + %or = or <4 x i32> %and1, %and2 + ret <4 x i32> %or +} + +define <4 x i32> @vec_not_sel_consts_undef_elts(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: @vec_not_sel_consts_undef_elts( +; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[B:%.*]], +; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] +; CHECK-NEXT: ret <4 x i32> [[OR]] +; + %and1 = and <4 x i32> %a, + %and2 = and <4 x i32> %b, + %or = or <4 x i32> %and1, %and2 + ret <4 x i32> %or +} + +; The inverted constants may be operands of xor instructions. + +define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { +; CHECK-LABEL: @vec_sel_xor( +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %mask = sext <4 x i1> %c to <4 x i32> + %mask_flip1 = xor <4 x i32> %mask, + %not_mask_flip1 = xor <4 x i32> %mask, + %and1 = and <4 x i32> %not_mask_flip1, %a + %and2 = and <4 x i32> %mask_flip1, %b + %or = or <4 x i32> %and1, %and2 + ret <4 x i32> %or +} + +; Allow the transform even if the mask values have multiple uses because +; there's still a net reduction of instructions from removing the and/and/or. + +define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { +; CHECK-LABEL: @vec_sel_xor_multi_use( +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], +; CHECK-NEXT: [[MASK_FLIP1:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[C]], +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[TMP3]], [[MASK_FLIP1]] +; CHECK-NEXT: ret <4 x i32> [[ADD]] +; + %mask = sext <4 x i1> %c to <4 x i32> + %mask_flip1 = xor <4 x i32> %mask, + %not_mask_flip1 = xor <4 x i32> %mask, + %and1 = and <4 x i32> %not_mask_flip1, %a + %and2 = and <4 x i32> %mask_flip1, %b + %or = or <4 x i32> %and1, %and2 + %add = add <4 x i32> %or, %mask_flip1 + ret <4 x i32> %add +} + +; The 'ashr' guarantees that we have a bitmask, so this is select with truncated condition. + +define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { +; CHECK-LABEL: @allSignBits( +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp slt i32 [[COND:%.*]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]] +; CHECK-NEXT: ret i32 [[TMP1]] +; + %bitmask = ashr i32 %cond, 31 + %not_bitmask = xor i32 %bitmask, -1 + %a1 = and i32 %tval, %bitmask + %a2 = and i32 %not_bitmask, %fval + %sel = or i32 %a1, %a2 + ret i32 %sel +} + +define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) { +; CHECK-LABEL: @allSignBits_vec( +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]] +; CHECK-NEXT: ret <4 x i8> [[TMP1]] +; + %bitmask = ashr <4 x i8> %cond, + %not_bitmask = xor <4 x i8> %bitmask, + %a1 = and <4 x i8> %tval, %bitmask + %a2 = and <4 x i8> %fval, %not_bitmask + %sel = or <4 x i8> %a2, %a1 + ret <4 x i8> %sel +} + +; Negative test - make sure that bitcasts from FP do not cause a crash. + +define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: @fp_bitcast( +; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[SIA]], [[BC1]] +; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x double> [[B]] to <2 x i64> +; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[SIB]], [[BC2]] +; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] +; CHECK-NEXT: ret <2 x i64> [[OR]] +; + %sia = fptosi <2 x double> %a to <2 x i64> + %sib = fptosi <2 x double> %b to <2 x i64> + %bc1 = bitcast <2 x double> %a to <2 x i64> + %and1 = and <2 x i64> %sia, %bc1 + %bc2 = bitcast <2 x double> %b to <2 x i64> + %and2 = and <2 x i64> %sib, %bc2 + %or = or <2 x i64> %and2, %and1 + ret <2 x i64> %or +} + +define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: @computesignbits_through_shuffles( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]] +; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[SHUF_OR2]] to <4 x i1> +; CHECK-NEXT: [[DOTV:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[DOTV]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %cmp = fcmp ole <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %s1 = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> + %s2 = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> + %shuf_or1 = or <4 x i32> %s1, %s2 + %s3 = shufflevector <4 x i32> %shuf_or1, <4 x i32> poison, <4 x i32> + %s4 = shufflevector <4 x i32> %shuf_or1, <4 x i32> poison, <4 x i32> + %shuf_or2 = or <4 x i32> %s3, %s4 + %not_or2 = xor <4 x i32> %shuf_or2, + %xbc = bitcast <4 x float> %x to <4 x i32> + %zbc = bitcast <4 x float> %z to <4 x i32> + %and1 = and <4 x i32> %not_or2, %xbc + %and2 = and <4 x i32> %shuf_or2, %zbc + %sel = or <4 x i32> %and1, %and2 + ret <4 x i32> %sel +} + +define <4 x i32> @computesignbits_through_two_input_shuffle(<4 x i32> %x, <4 x i32> %y, <4 x i1> %cond1, <4 x i1> %cond2) { +; CHECK-LABEL: @computesignbits_through_two_input_shuffle( +; CHECK-NEXT: [[SEXT1:%.*]] = sext <4 x i1> [[COND1:%.*]] to <4 x i32> +; CHECK-NEXT: [[SEXT2:%.*]] = sext <4 x i1> [[COND2:%.*]] to <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i32> [[SEXT1]], <4 x i32> [[SEXT2]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[COND]] to <4 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]] +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; + %sext1 = sext <4 x i1> %cond1 to <4 x i32> + %sext2 = sext <4 x i1> %cond2 to <4 x i32> + %cond = shufflevector <4 x i32> %sext1, <4 x i32> %sext2, <4 x i32> + %notcond = xor <4 x i32> %cond, + %and1 = and <4 x i32> %notcond, %x + %and2 = and <4 x i32> %cond, %y + %sel = or <4 x i32> %and1, %and2 + ret <4 x i32> %sel +} + diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll index 4243e82..501dc70 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll @@ -202,13 +202,13 @@ define <4 x double> @gather_lane2(double* %base, double %pt) { ; CHECK-LABEL: @gather_lane2( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> poison, double [[PT:%.*]], i64 0 -; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> , <4 x double> [[PT_V2]]) ; CHECK-NEXT: ret <4 x double> [[RES]] ; %ptrs = getelementptr double, double *%base, <4 x i64> %pt_v1 = insertelement <4 x double> poison, double %pt, i64 0 - %pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> undef, <4 x i32> zeroinitializer + %pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> poison, <4 x i32> zeroinitializer %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> , <4 x double> %pt_v2) ret <4 x double> %res } diff --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll new file mode 100644 index 0000000..6027728 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll @@ -0,0 +1,1108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare i32 @llvm.abs.i32(i32, i1) + +define i32 @pow2_multiplier(i32 %A) { +; CHECK-LABEL: @pow2_multiplier( +; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i32 [[B]] +; + %B = mul i32 %A, 2 + ret i32 %B +} + +define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) { +; CHECK-LABEL: @pow2_multiplier_vec( +; CHECK-NEXT: [[B:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i32> [[B]] +; + %B = mul <2 x i32> %A, + ret <2 x i32> %B +} + +define i8 @combine_shl(i8 %A) { +; CHECK-LABEL: @combine_shl( +; CHECK-NEXT: [[C:%.*]] = shl i8 [[A:%.*]], 6 +; CHECK-NEXT: ret i8 [[C]] +; + %B = mul i8 %A, 8 + %C = mul i8 %B, 8 + ret i8 %C +} + +define i32 @neg(i32 %i) { +; CHECK-LABEL: @neg( +; CHECK-NEXT: [[T:%.*]] = sub i32 0, [[I:%.*]] +; CHECK-NEXT: ret i32 [[T]] +; + %t = mul i32 %i, -1 + ret i32 %t +} + +; Use the sign-bit as a mask: +; (zext (A < 0)) * B --> (A >> 31) & B + +define i32 @test10(i32 %a, i32 %b) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[E]] +; + %c = icmp slt i32 %a, 0 + %d = zext i1 %c to i32 + %e = mul i32 %d, %b + ret i32 %e +} + +define i32 @test11(i32 %a, i32 %b) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[E]] +; + %c = icmp sle i32 %a, -1 + %d = zext i1 %c to i32 + %e = mul i32 %d, %b + ret i32 %e +} + +declare void @use32(i32) + +define i32 @test12(i32 %a, i32 %b) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[A_LOBIT:%.*]] = lshr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A]], 31 +; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: call void @use32(i32 [[A_LOBIT]]) +; CHECK-NEXT: ret i32 [[E]] +; + %c = icmp ugt i32 %a, 2147483647 + %d = zext i1 %c to i32 + %e = mul i32 %d, %b + call void @use32(i32 %d) + ret i32 %e +} + +; rdar://7293527 +define i32 @test15(i32 %A, i32 %B) { +; CHECK-LABEL: @test15( +; CHECK-NEXT: [[M:%.*]] = shl i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[M]] +; + %shl = shl i32 1, %B + %m = mul i32 %shl, %A + ret i32 %m +} + +; X * Y (when Y is a boolean) --> Y ? X : 0 + +define i32 @mul_bool(i32 %x, i1 %y) { +; CHECK-LABEL: @mul_bool( +; CHECK-NEXT: [[M:%.*]] = select i1 [[Y:%.*]], i32 [[X:%.*]], i32 0 +; CHECK-NEXT: ret i32 [[M]] +; + %z = zext i1 %y to i32 + %m = mul i32 %x, %z + ret i32 %m +} + +; Commute and test vector type. + +define <2 x i32> @mul_bool_vec(<2 x i32> %x, <2 x i1> %y) { +; CHECK-LABEL: @mul_bool_vec( +; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x i32> [[M]] +; + %z = zext <2 x i1> %y to <2 x i32> + %m = mul <2 x i32> %x, %z + ret <2 x i32> %m +} + +define <2 x i32> @mul_bool_vec_commute(<2 x i32> %x, <2 x i1> %y) { +; CHECK-LABEL: @mul_bool_vec_commute( +; CHECK-NEXT: [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x i32> [[M]] +; + %z = zext <2 x i1> %y to <2 x i32> + %m = mul <2 x i32> %z, %x + ret <2 x i32> %m +} + +define <3 x i7> @mul_bools(<3 x i1> %x, <3 x i1> %y) { +; CHECK-LABEL: @mul_bools( +; CHECK-NEXT: [[MULBOOL:%.*]] = and <3 x i1> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = zext <3 x i1> [[MULBOOL]] to <3 x i7> +; CHECK-NEXT: ret <3 x i7> [[R]] +; + %zx = zext <3 x i1> %x to <3 x i7> + %zy = zext <3 x i1> %y to <3 x i7> + %r = mul <3 x i7> %zx, %zy + ret <3 x i7> %r +} + +define i32 @mul_bools_use1(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_use1( +; CHECK-NEXT: [[ZY:%.*]] = zext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[ZY]]) +; CHECK-NEXT: [[MULBOOL:%.*]] = and i1 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = zext i1 [[MULBOOL]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %zx = zext i1 %x to i32 + %zy = zext i1 %y to i32 + call void @use32(i32 %zy) + %r = mul i32 %zx, %zy + ret i32 %r +} + +define i32 @mul_bools_use2(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_use2( +; CHECK-NEXT: [[ZY:%.*]] = zext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[ZY]]) +; CHECK-NEXT: [[MULBOOL:%.*]] = and i1 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = zext i1 [[MULBOOL]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %zx = zext i1 %x to i32 + %zy = zext i1 %y to i32 + call void @use32(i32 %zy) + %r = mul i32 %zy, %zx + ret i32 %r +} + +define i32 @mul_bools_use3(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_use3( +; CHECK-NEXT: [[ZX:%.*]] = zext i1 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[ZX]]) +; CHECK-NEXT: [[ZY:%.*]] = zext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[ZY]]) +; CHECK-NEXT: [[R:%.*]] = select i1 [[X]], i32 [[ZY]], i32 0 +; CHECK-NEXT: ret i32 [[R]] +; + %zx = zext i1 %x to i32 + call void @use32(i32 %zx) + %zy = zext i1 %y to i32 + call void @use32(i32 %zy) + %r = mul i32 %zx, %zy + ret i32 %r +} + +define <3 x i32> @mul_bools_sext(<3 x i1> %x, <3 x i1> %y) { +; CHECK-LABEL: @mul_bools_sext( +; CHECK-NEXT: [[MULBOOL:%.*]] = and <3 x i1> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = zext <3 x i1> [[MULBOOL]] to <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[R]] +; + %sx = sext <3 x i1> %x to <3 x i32> + %sy = sext <3 x i1> %y to <3 x i32> + %r = mul <3 x i32> %sx, %sy + ret <3 x i32> %r +} + +define i32 @mul_bools_sext_use1(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_sext_use1( +; CHECK-NEXT: [[SY:%.*]] = sext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SY]]) +; CHECK-NEXT: [[MULBOOL:%.*]] = and i1 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = zext i1 [[MULBOOL]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %sx = sext i1 %x to i32 + %sy = sext i1 %y to i32 + call void @use32(i32 %sy) + %r = mul i32 %sx, %sy + ret i32 %r +} + +define i32 @mul_bools_sext_use2(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_sext_use2( +; CHECK-NEXT: [[SY:%.*]] = sext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SY]]) +; CHECK-NEXT: [[MULBOOL:%.*]] = and i1 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = zext i1 [[MULBOOL]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %sx = sext i1 %x to i32 + %sy = sext i1 %y to i32 + call void @use32(i32 %sy) + %r = mul i32 %sy, %sx + ret i32 %r +} + +define i32 @mul_bools_sext_use3(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_sext_use3( +; CHECK-NEXT: [[SX:%.*]] = sext i1 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SX]]) +; CHECK-NEXT: [[SY:%.*]] = sext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SY]]) +; CHECK-NEXT: [[R:%.*]] = mul nsw i32 [[SY]], [[SX]] +; CHECK-NEXT: ret i32 [[R]] +; + %sx = sext i1 %x to i32 + call void @use32(i32 %sx) + %sy = sext i1 %y to i32 + call void @use32(i32 %sy) + %r = mul i32 %sy, %sx + ret i32 %r +} + +define <3 x i32> @mul_bools_mixed_ext(<3 x i1> %x, <3 x i1> %y) { +; CHECK-LABEL: @mul_bools_mixed_ext( +; CHECK-NEXT: [[MULBOOL:%.*]] = and <3 x i1> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = sext <3 x i1> [[MULBOOL]] to <3 x i32> +; CHECK-NEXT: ret <3 x i32> [[R]] +; + %zx = zext <3 x i1> %x to <3 x i32> + %sy = sext <3 x i1> %y to <3 x i32> + %r = mul <3 x i32> %zx, %sy + ret <3 x i32> %r +} + +define i32 @mul_bools_mixed_ext_use1(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_mixed_ext_use1( +; CHECK-NEXT: [[ZY:%.*]] = zext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[ZY]]) +; CHECK-NEXT: [[MULBOOL:%.*]] = and i1 [[X:%.*]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = sext i1 [[MULBOOL]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %sx = sext i1 %x to i32 + %zy = zext i1 %y to i32 + call void @use32(i32 %zy) + %r = mul i32 %sx, %zy + ret i32 %r +} + +define i32 @mul_bools_mixed_ext_use2(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_mixed_ext_use2( +; CHECK-NEXT: [[SY:%.*]] = sext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SY]]) +; CHECK-NEXT: [[MULBOOL:%.*]] = and i1 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = sext i1 [[MULBOOL]] to i32 +; CHECK-NEXT: ret i32 [[R]] +; + %zx = zext i1 %x to i32 + %sy = sext i1 %y to i32 + call void @use32(i32 %sy) + %r = mul i32 %sy, %zx + ret i32 %r +} + +define i32 @mul_bools_mixed_ext_use3(i1 %x, i1 %y) { +; CHECK-LABEL: @mul_bools_mixed_ext_use3( +; CHECK-NEXT: [[SX:%.*]] = sext i1 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[SX]]) +; CHECK-NEXT: [[ZY:%.*]] = zext i1 [[Y:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[ZY]]) +; CHECK-NEXT: [[R:%.*]] = select i1 [[Y]], i32 [[SX]], i32 0 +; CHECK-NEXT: ret i32 [[R]] +; + %sx = sext i1 %x to i32 + call void @use32(i32 %sx) + %zy = zext i1 %y to i32 + call void @use32(i32 %zy) + %r = mul i32 %zy, %sx + ret i32 %r +} + +; (A >>u 31) * B --> (A >>s 31) & B + +define i32 @signbit_mul(i32 %a, i32 %b) { +; CHECK-LABEL: @signbit_mul( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[E]] +; + %d = lshr i32 %a, 31 + %e = mul i32 %d, %b + ret i32 %e +} + +define i32 @signbit_mul_commute_extra_use(i32 %a, i32 %b) { +; CHECK-LABEL: @signbit_mul_commute_extra_use( +; CHECK-NEXT: [[D:%.*]] = lshr i32 [[A:%.*]], 31 +; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A]], 31 +; CHECK-NEXT: [[E:%.*]] = and i32 [[TMP1]], [[B:%.*]] +; CHECK-NEXT: call void @use32(i32 [[D]]) +; CHECK-NEXT: ret i32 [[E]] +; + %d = lshr i32 %a, 31 + %e = mul i32 %b, %d + call void @use32(i32 %d) + ret i32 %e +} + +; (A >>u 31)) * B --> (A >>s 31) & B + +define <2 x i32> @signbit_mul_vec(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @signbit_mul_vec( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret <2 x i32> [[E]] +; + %d = lshr <2 x i32> %a, + %e = mul <2 x i32> %d, %b + ret <2 x i32> %e +} + +define <2 x i32> @signbit_mul_vec_commute(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @signbit_mul_vec_commute( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[E:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: ret <2 x i32> [[E]] +; + %d = lshr <2 x i32> %a, + %e = mul <2 x i32> %b, %d + ret <2 x i32> %e +} + +define i32 @test18(i32 %A, i32 %B) { +; CHECK-LABEL: @test18( +; CHECK-NEXT: ret i32 0 +; + %C = and i32 %A, 1 + %D = and i32 %B, 1 + %E = mul i32 %C, %D + %F = and i32 %E, 16 + ret i32 %F +} + +declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) +declare void @use(i1) + +define i32 @test19(i32 %A, i32 %B) { +; CHECK-LABEL: @test19( +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: ret i32 0 +; + %C = and i32 %A, 1 + %D = and i32 %B, 1 + +; It would be nice if we also started proving that this doesn't overflow. + %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D) + %F = extractvalue {i32, i1} %E, 0 + %G = extractvalue {i32, i1} %E, 1 + call void @use(i1 %G) + %H = and i32 %F, 16 + ret i32 %H +} + +define <2 x i64> @test20(<2 x i64> %A) { +; CHECK-LABEL: @test20( +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = add <2 x i64> [[TMP1]], +; CHECK-NEXT: ret <2 x i64> [[C]] +; + %B = add <2 x i64> %A, + %C = mul <2 x i64> %B, + ret <2 x i64> %C +} + +define <2 x i1> @test21(<2 x i1> %A, <2 x i1> %B) { +; CHECK-LABEL: @test21( +; CHECK-NEXT: [[C:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %C = mul <2 x i1> %A, %B + ret <2 x i1> %C +} + +define i32 @test22(i32 %A) { +; CHECK-LABEL: @test22( +; CHECK-NEXT: [[B:%.*]] = sub nsw i32 0, [[A:%.*]] +; CHECK-NEXT: ret i32 [[B]] +; + %B = mul nsw i32 %A, -1 + ret i32 %B +} + +define i32 @test23(i32 %A) { +; CHECK-LABEL: @test23( +; CHECK-NEXT: [[C:%.*]] = mul nuw i32 [[A:%.*]], 6 +; CHECK-NEXT: ret i32 [[C]] +; + %B = shl nuw i32 %A, 1 + %C = mul nuw i32 %B, 3 + ret i32 %C +} + +define i32 @test24(i32 %A) { +; CHECK-LABEL: @test24( +; CHECK-NEXT: [[C:%.*]] = mul nsw i32 [[A:%.*]], 6 +; CHECK-NEXT: ret i32 [[C]] +; + %B = shl nsw i32 %A, 1 + %C = mul nsw i32 %B, 3 + ret i32 %C +} + +define i32 @neg_neg_mul(i32 %A, i32 %B) { +; CHECK-LABEL: @neg_neg_mul( +; CHECK-NEXT: [[E:%.*]] = mul i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[E]] +; + %C = sub i32 0, %A + %D = sub i32 0, %B + %E = mul i32 %C, %D + ret i32 %E +} + +define i32 @neg_neg_mul_nsw(i32 %A, i32 %B) { +; CHECK-LABEL: @neg_neg_mul_nsw( +; CHECK-NEXT: [[E:%.*]] = mul nsw i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[E]] +; + %C = sub nsw i32 0, %A + %D = sub nsw i32 0, %B + %E = mul nsw i32 %C, %D + ret i32 %E +} + +define i124 @neg_neg_mul_apint(i124 %A, i124 %B) { +; CHECK-LABEL: @neg_neg_mul_apint( +; CHECK-NEXT: [[E:%.*]] = mul i124 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i124 [[E]] +; + %C = sub i124 0, %A + %D = sub i124 0, %B + %E = mul i124 %C, %D + ret i124 %E +} + +define i32 @neg_mul_constant(i32 %A) { +; CHECK-LABEL: @neg_mul_constant( +; CHECK-NEXT: [[E:%.*]] = mul i32 [[A:%.*]], -7 +; CHECK-NEXT: ret i32 [[E]] +; + %C = sub i32 0, %A + %E = mul i32 %C, 7 + ret i32 %E +} + +define i55 @neg_mul_constant_apint(i55 %A) { +; CHECK-LABEL: @neg_mul_constant_apint( +; CHECK-NEXT: [[E:%.*]] = mul i55 [[A:%.*]], -7 +; CHECK-NEXT: ret i55 [[E]] +; + %C = sub i55 0, %A + %E = mul i55 %C, 7 + ret i55 %E +} + +define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) { +; CHECK-LABEL: @neg_mul_constant_vec( +; CHECK-NEXT: [[B:%.*]] = mul <3 x i8> [[A:%.*]], +; CHECK-NEXT: ret <3 x i8> [[B]] +; + %A = sub <3 x i8> zeroinitializer, %a + %B = mul <3 x i8> %A, + ret <3 x i8> %B +} + +define <3 x i4> @neg_mul_constant_vec_weird(<3 x i4> %a) { +; CHECK-LABEL: @neg_mul_constant_vec_weird( +; CHECK-NEXT: [[B:%.*]] = mul <3 x i4> [[A:%.*]], +; CHECK-NEXT: ret <3 x i4> [[B]] +; + %A = sub <3 x i4> zeroinitializer, %a + %B = mul <3 x i4> %A, + ret <3 x i4> %B +} + +define i32 @test26(i32 %A, i32 %B) { +; CHECK-LABEL: @test26( +; CHECK-NEXT: [[D:%.*]] = shl nsw i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[D]] +; + %C = shl nsw i32 1, %B + %D = mul nsw i32 %A, %C + ret i32 %D +} + +define i32 @test27(i32 %A, i32 %B) { +; CHECK-LABEL: @test27( +; CHECK-NEXT: [[D:%.*]] = shl nuw i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[D]] +; + %C = shl i32 1, %B + %D = mul nuw i32 %A, %C + ret i32 %D +} + +define i32 @test28(i32 %A) { +; CHECK-LABEL: @test28( +; CHECK-NEXT: [[B:%.*]] = shl i32 1, [[A:%.*]] +; CHECK-NEXT: [[C:%.*]] = shl i32 [[B]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; + %B = shl i32 1, %A + %C = mul nsw i32 %B, %B + ret i32 %C +} + +define i64 @test29(i31 %A, i31 %B) { +; CHECK-LABEL: @test29( +; CHECK-NEXT: [[C:%.*]] = sext i31 [[A:%.*]] to i64 +; CHECK-NEXT: [[D:%.*]] = sext i31 [[B:%.*]] to i64 +; CHECK-NEXT: [[E:%.*]] = mul nsw i64 [[C]], [[D]] +; CHECK-NEXT: ret i64 [[E]] +; + %C = sext i31 %A to i64 + %D = sext i31 %B to i64 + %E = mul i64 %C, %D + ret i64 %E +} + +define i64 @test30(i32 %A, i32 %B) { +; CHECK-LABEL: @test30( +; CHECK-NEXT: [[C:%.*]] = zext i32 [[A:%.*]] to i64 +; CHECK-NEXT: [[D:%.*]] = zext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[E:%.*]] = mul nuw i64 [[C]], [[D]] +; CHECK-NEXT: ret i64 [[E]] +; + %C = zext i32 %A to i64 + %D = zext i32 %B to i64 + %E = mul i64 %C, %D + ret i64 %E +} + +@PR22087 = external global i32 +define i32 @test31(i32 %V) { +; CHECK-LABEL: @test31( +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[V:%.*]], zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32) +; CHECK-NEXT: ret i32 [[MUL]] +; + %mul = mul i32 %V, shl (i32 1, i32 zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32)) + ret i32 %mul +} + +define i32 @test32(i32 %X) { +; CHECK-LABEL: @test32( +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[X:%.*]], 31 +; CHECK-NEXT: ret i32 [[MUL]] +; + %mul = mul nsw i32 %X, -2147483648 + ret i32 %mul +} + +define <2 x i32> @test32vec(<2 x i32> %X) { +; CHECK-LABEL: @test32vec( +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %mul = mul nsw <2 x i32> %X, + ret <2 x i32> %mul +} + +define i32 @test33(i32 %X) { +; CHECK-LABEL: @test33( +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[X:%.*]], 30 +; CHECK-NEXT: ret i32 [[MUL]] +; + %mul = mul nsw i32 %X, 1073741824 + ret i32 %mul +} + +define <2 x i32> @test33vec(<2 x i32> %X) { +; CHECK-LABEL: @test33vec( +; CHECK-NEXT: [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %mul = mul nsw <2 x i32> %X, + ret <2 x i32> %mul +} + +define i128 @test34(i128 %X) { +; CHECK-LABEL: @test34( +; CHECK-NEXT: [[MUL:%.*]] = shl nsw i128 [[X:%.*]], 1 +; CHECK-NEXT: ret i128 [[MUL]] +; + %mul = mul nsw i128 %X, 2 + ret i128 %mul +} + +define i32 @test_mul_canonicalize_op0(i32 %x, i32 %y) { +; CHECK-LABEL: @test_mul_canonicalize_op0( +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %neg = sub i32 0, %x + %mul = mul i32 %neg, %y + ret i32 %mul +} + +define i32 @test_mul_canonicalize_op1(i32 %x, i32 %z) { +; CHECK-LABEL: @test_mul_canonicalize_op1( +; CHECK-NEXT: [[Y_NEG:%.*]] = mul i32 [[Z:%.*]], -3 +; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[Y_NEG]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[DOTNEG]] +; + %y = mul i32 %z, 3 + %neg = sub i32 0, %x + %mul = mul i32 %y, %neg + ret i32 %mul +} + +define i32 @test_mul_canonicalize_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: @test_mul_canonicalize_nsw( +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = sub i32 0, [[TMP1]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %neg = sub nsw i32 0, %x + %mul = mul nsw i32 %neg, %y + ret i32 %mul +} + +define <2 x i32> @test_mul_canonicalize_vec(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @test_mul_canonicalize_vec( +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> zeroinitializer, [[TMP1]] +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %neg = sub <2 x i32> , %x + %mul = mul <2 x i32> %neg, %y + ret <2 x i32> %mul +} + +define i32 @test_mul_canonicalize_multiple_uses(i32 %x, i32 %y) { +; CHECK-LABEL: @test_mul_canonicalize_multiple_uses( +; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[X:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[NEG]], [[Y:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[MUL]], [[NEG]] +; CHECK-NEXT: ret i32 [[MUL2]] +; + %neg = sub i32 0, %x + %mul = mul i32 %neg, %y + %mul2 = mul i32 %mul, %neg + ret i32 %mul2 +} + +@X = global i32 5 + +define i64 @test_mul_canonicalize_neg_is_not_undone(i64 %L1) { +; Check we do not undo the canonicalization of 0 - (X * Y), if Y is a constant +; expr. +; CHECK-LABEL: @test_mul_canonicalize_neg_is_not_undone( +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[L1:%.*]], ptrtoint (i32* @X to i64) +; CHECK-NEXT: [[B4:%.*]] = sub i64 0, [[TMP1]] +; CHECK-NEXT: ret i64 [[B4]] +; + %v1 = ptrtoint i32* @X to i64 + %B8 = sub i64 0, %v1 + %B4 = mul i64 %B8, %L1 + ret i64 %B4 +} + +define i32 @negate_if_true(i32 %x, i1 %cond) { +; CHECK-LABEL: @negate_if_true( +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND:%.*]], i32 [[TMP1]], i32 [[X]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %sel = select i1 %cond, i32 -1, i32 1 + %r = mul i32 %sel, %x + ret i32 %r +} + +define i32 @negate_if_false(i32 %x, i1 %cond) { +; CHECK-LABEL: @negate_if_false( +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND:%.*]], i32 [[X]], i32 [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %sel = select i1 %cond, i32 1, i32 -1 + %r = mul i32 %sel, %x + ret i32 %r +} + +define <2 x i8> @negate_if_true_commute(<2 x i8> %px, i1 %cond) { +; CHECK-LABEL: @negate_if_true_commute( +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i8> zeroinitializer, [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[X]] +; CHECK-NEXT: ret <2 x i8> [[TMP2]] +; + %x = sdiv <2 x i8> , %px ; thwart complexity-based canonicalization + %sel = select i1 %cond, <2 x i8> , <2 x i8> + %r = mul <2 x i8> %x, %sel + ret <2 x i8> %r +} + +define <2 x i8> @negate_if_false_commute(<2 x i8> %px, <2 x i1> %cond) { +; CHECK-LABEL: @negate_if_false_commute( +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i8> zeroinitializer, [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i8> [[X]], <2 x i8> [[TMP1]] +; CHECK-NEXT: ret <2 x i8> [[TMP2]] +; + %x = sdiv <2 x i8> , %px ; thwart complexity-based canonicalization + %sel = select <2 x i1> %cond, <2 x i8> , <2 x i8> + %r = mul <2 x i8> %x, %sel + ret <2 x i8> %r +} + +; Negative test + +define i32 @negate_if_true_extra_use(i32 %x, i1 %cond) { +; CHECK-LABEL: @negate_if_true_extra_use( +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], i32 -1, i32 1 +; CHECK-NEXT: call void @use32(i32 [[SEL]]) +; CHECK-NEXT: [[R:%.*]] = mul i32 [[SEL]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %sel = select i1 %cond, i32 -1, i32 1 + call void @use32(i32 %sel) + %r = mul i32 %sel, %x + ret i32 %r +} + +; Negative test + +define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) { +; CHECK-LABEL: @negate_if_true_wrong_constant( +; CHECK-NEXT: [[X:%.*]] = sdiv <2 x i8> , [[PX:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> , <2 x i8> +; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[X]], [[SEL]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %x = sdiv <2 x i8> , %px ; thwart complexity-based canonicalization + %sel = select i1 %cond, <2 x i8> , <2 x i8> + %r = mul <2 x i8> %x, %sel + ret <2 x i8> %r +} + +; (C ? (X /exact Y) : 1) * Y -> C ? X : Y +define i32 @mul_div_select(i32 %x, i32 %y, i1 %c) { +; CHECK-LABEL: @mul_div_select( +; CHECK-NEXT: [[MUL:%.*]] = select i1 [[C:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %div = udiv exact i32 %x, %y + %sel = select i1 %c, i32 %div, i32 1 + %mul = mul i32 %sel, %y + ret i32 %mul +} + +; fold mul(abs(x),abs(x)) -> mul(x,x) +define i31 @combine_mul_abs_i31(i31 %0) { +; CHECK-LABEL: @combine_mul_abs_i31( +; CHECK-NEXT: [[M:%.*]] = mul i31 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i31 [[M]] +; + %c = icmp slt i31 %0, 0 + %s = sub nsw i31 0, %0 + %r = select i1 %c, i31 %s, i31 %0 + %m = mul i31 %r, %r + ret i31 %m +} + +define i32 @combine_mul_abs_i32(i32 %0) { +; CHECK-LABEL: @combine_mul_abs_i32( +; CHECK-NEXT: [[M:%.*]] = mul i32 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i32 [[M]] +; + %c = icmp slt i32 %0, 0 + %s = sub nsw i32 0, %0 + %r = select i1 %c, i32 %s, i32 %0 + %m = mul i32 %r, %r + ret i32 %m +} + +define <4 x i32> @combine_mul_abs_v4i32(<4 x i32> %0) { +; CHECK-LABEL: @combine_mul_abs_v4i32( +; CHECK-NEXT: [[M:%.*]] = mul <4 x i32> [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret <4 x i32> [[M]] +; + %c = icmp slt <4 x i32> %0, zeroinitializer + %s = sub nsw <4 x i32> zeroinitializer, %0 + %r = select <4 x i1> %c, <4 x i32> %s, <4 x i32> %0 + %m = mul <4 x i32> %r, %r + ret <4 x i32> %m +} + +; fold mul(nabs(x),nabs(x)) -> mul(x,x) +define i31 @combine_mul_nabs_i31(i31 %0) { +; CHECK-LABEL: @combine_mul_nabs_i31( +; CHECK-NEXT: [[M:%.*]] = mul i31 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i31 [[M]] +; + %c = icmp slt i31 %0, 0 + %s = sub nsw i31 0, %0 + %r = select i1 %c, i31 %0, i31 %s + %m = mul i31 %r, %r + ret i31 %m +} + +define i32 @combine_mul_nabs_i32(i32 %0) { +; CHECK-LABEL: @combine_mul_nabs_i32( +; CHECK-NEXT: [[M:%.*]] = mul i32 [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret i32 [[M]] +; + %c = icmp slt i32 %0, 0 + %s = sub nsw i32 0, %0 + %r = select i1 %c, i32 %0, i32 %s + %m = mul i32 %r, %r + ret i32 %m +} + +define <4 x i32> @combine_mul_nabs_v4i32(<4 x i32> %0) { +; CHECK-LABEL: @combine_mul_nabs_v4i32( +; CHECK-NEXT: [[M:%.*]] = mul <4 x i32> [[TMP0:%.*]], [[TMP0]] +; CHECK-NEXT: ret <4 x i32> [[M]] +; + %c = icmp slt <4 x i32> %0, zeroinitializer + %s = sub nsw <4 x i32> zeroinitializer, %0 + %r = select <4 x i1> %c, <4 x i32> %0, <4 x i32> %s + %m = mul <4 x i32> %r, %r + ret <4 x i32> %m +} + +define i32 @combine_mul_abs_intrin(i32 %x) { +; CHECK-LABEL: @combine_mul_abs_intrin( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], [[X]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %mul = mul i32 %abs, %abs + ret i32 %mul +} + +define i32 @combine_mul_nabs_intrin(i32 %x) { +; CHECK-LABEL: @combine_mul_nabs_intrin( +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], [[X]] +; CHECK-NEXT: ret i32 [[MUL]] +; + %abs = call i32 @llvm.abs.i32(i32 %x, i1 false) + %neg = sub i32 0, %abs + %mul = mul i32 %neg, %neg + ret i32 %mul +} + +; z * splat(0) = splat(0), even for scalable vectors +define @mul_scalable_splat_zero( %z) { +; CHECK-LABEL: @mul_scalable_splat_zero( +; CHECK-NEXT: ret zeroinitializer +; + %shuf = shufflevector insertelement ( undef, i64 0, i32 0), poison, zeroinitializer + %t3 = mul %shuf, %z + ret %t3 +} + +; +; fold mul(sub(x,y),negpow2) -> shl(sub(y,x),log2(pow2)) +; + +define i32 @mulsub1(i32 %a0, i32 %a1) { +; CHECK-LABEL: @mulsub1( +; CHECK-NEXT: [[SUB_NEG:%.*]] = sub i32 [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[SUB_NEG]], 2 +; CHECK-NEXT: ret i32 [[MUL]] +; + %sub = sub i32 %a1, %a0 + %mul = mul i32 %sub, -4 + ret i32 %mul +} + +define <2 x i32> @mulsub1_vec(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: @mulsub1_vec( +; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %sub = sub <2 x i32> %a1, %a0 + %mul = mul <2 x i32> %sub, + ret <2 x i32> %mul +} + +define <2 x i32> @mulsub1_vec_nonuniform(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: @mulsub1_vec_nonuniform( +; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %sub = sub <2 x i32> %a1, %a0 + %mul = mul <2 x i32> %sub, + ret <2 x i32> %mul +} + +define <2 x i32> @mulsub1_vec_nonuniform_undef(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: @mulsub1_vec_nonuniform_undef( +; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %sub = sub <2 x i32> %a1, %a0 + %mul = mul <2 x i32> %sub, + ret <2 x i32> %mul +} + +define i32 @mulsub2(i32 %a0) { +; CHECK-LABEL: @mulsub2( +; CHECK-NEXT: [[SUB_NEG:%.*]] = shl i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MUL:%.*]] = add i32 [[SUB_NEG]], -64 +; CHECK-NEXT: ret i32 [[MUL]] +; + %sub = sub i32 16, %a0 + %mul = mul i32 %sub, -4 + ret i32 %mul +} + +define <2 x i32> @mulsub2_vec(<2 x i32> %a0) { +; CHECK-LABEL: @mulsub2_vec( +; CHECK-NEXT: [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %sub = sub <2 x i32> , %a0 + %mul = mul <2 x i32> %sub, + ret <2 x i32> %mul +} + +define <2 x i32> @mulsub2_vec_nonuniform(<2 x i32> %a0) { +; CHECK-LABEL: @mulsub2_vec_nonuniform( +; CHECK-NEXT: [[SUB_NEG:%.*]] = add <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %sub = sub <2 x i32> , %a0 + %mul = mul <2 x i32> %sub, + ret <2 x i32> %mul +} + +define <2 x i32> @mulsub2_vec_nonuniform_undef(<2 x i32> %a0) { +; CHECK-LABEL: @mulsub2_vec_nonuniform_undef( +; CHECK-NEXT: [[SUB_NEG:%.*]] = add <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %sub = sub <2 x i32> , %a0 + %mul = mul <2 x i32> %sub, + ret <2 x i32> %mul +} + +define i32 @muladd2(i32 %a0) { +; CHECK-LABEL: @muladd2( +; CHECK-NEXT: [[ADD_NEG_NEG:%.*]] = mul i32 [[A0:%.*]], -4 +; CHECK-NEXT: [[MUL:%.*]] = add i32 [[ADD_NEG_NEG]], -64 +; CHECK-NEXT: ret i32 [[MUL]] +; + %add = add i32 %a0, 16 + %mul = mul i32 %add, -4 + ret i32 %mul +} + +define <2 x i32> @muladd2_vec(<2 x i32> %a0) { +; CHECK-LABEL: @muladd2_vec( +; CHECK-NEXT: [[ADD_NEG_NEG:%.*]] = mul <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[ADD_NEG_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %add = add <2 x i32> %a0, + %mul = mul <2 x i32> %add, + ret <2 x i32> %mul +} + +define <2 x i32> @muladd2_vec_nonuniform(<2 x i32> %a0) { +; CHECK-LABEL: @muladd2_vec_nonuniform( +; CHECK-NEXT: [[ADD_NEG:%.*]] = sub <2 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[ADD_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %add = add <2 x i32> %a0, + %mul = mul <2 x i32> %add, + ret <2 x i32> %mul +} + +define <2 x i32> @muladd2_vec_nonuniform_undef(<2 x i32> %a0) { +; CHECK-LABEL: @muladd2_vec_nonuniform_undef( +; CHECK-NEXT: [[ADD_NEG:%.*]] = sub <2 x i32> , [[A0:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[ADD_NEG]], +; CHECK-NEXT: ret <2 x i32> [[MUL]] +; + %add = add <2 x i32> %a0, + %mul = mul <2 x i32> %add, + ret <2 x i32> %mul +} + +define i32 @mulmuladd2(i32 %a0, i32 %a1) { +; CHECK-LABEL: @mulmuladd2( +; CHECK-NEXT: [[ADD_NEG:%.*]] = sub i32 -16, [[A0:%.*]] +; CHECK-NEXT: [[MUL1_NEG:%.*]] = mul i32 [[ADD_NEG]], [[A1:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = shl i32 [[MUL1_NEG]], 2 +; CHECK-NEXT: ret i32 [[MUL2]] +; + %add = add i32 %a0, 16 + %mul1 = mul i32 %add, %a1 + %mul2 = mul i32 %mul1, -4 + ret i32 %mul2 +} +define i32 @mulmuladd2_extrause0(i32 %a0, i32 %a1) { +; CHECK-LABEL: @mulmuladd2_extrause0( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A0:%.*]], 16 +; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[ADD]], [[A1:%.*]] +; CHECK-NEXT: call void @use32(i32 [[MUL1]]) +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[MUL1]], -4 +; CHECK-NEXT: ret i32 [[MUL2]] +; + %add = add i32 %a0, 16 + %mul1 = mul i32 %add, %a1 + call void @use32(i32 %mul1) + %mul2 = mul i32 %mul1, -4 + ret i32 %mul2 +} +define i32 @mulmuladd2_extrause1(i32 %a0, i32 %a1) { +; CHECK-LABEL: @mulmuladd2_extrause1( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A0:%.*]], 16 +; CHECK-NEXT: call void @use32(i32 [[ADD]]) +; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[ADD]], [[A1:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[MUL1]], -4 +; CHECK-NEXT: ret i32 [[MUL2]] +; + %add = add i32 %a0, 16 + call void @use32(i32 %add) + %mul1 = mul i32 %add, %a1 + %mul2 = mul i32 %mul1, -4 + ret i32 %mul2 +} +define i32 @mulmuladd2_extrause2(i32 %a0, i32 %a1) { +; CHECK-LABEL: @mulmuladd2_extrause2( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A0:%.*]], 16 +; CHECK-NEXT: call void @use32(i32 [[ADD]]) +; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[ADD]], [[A1:%.*]] +; CHECK-NEXT: call void @use32(i32 [[MUL1]]) +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[MUL1]], -4 +; CHECK-NEXT: ret i32 [[MUL2]] +; + %add = add i32 %a0, 16 + call void @use32(i32 %add) + %mul1 = mul i32 %add, %a1 + call void @use32(i32 %mul1) + %mul2 = mul i32 %mul1, -4 + ret i32 %mul2 +} + +define i32 @mulnot(i32 %a0) { +; CHECK-LABEL: @mulnot( +; CHECK-NEXT: [[ADD_NEG:%.*]] = shl i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MUL:%.*]] = add i32 [[ADD_NEG]], 4 +; CHECK-NEXT: ret i32 [[MUL]] +; + %add = xor i32 %a0, -1 + %mul = mul i32 %add, -4 + ret i32 %mul +} +define i32 @mulnot_extrause(i32 %a0) { +; CHECK-LABEL: @mulnot_extrause( +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A0:%.*]], -1 +; CHECK-NEXT: call void @use32(i32 [[NOT]]) +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[NOT]], -4 +; CHECK-NEXT: ret i32 [[MUL]] +; + %not = xor i32 %a0, -1 + call void @use32(i32 %not) + %mul = mul i32 %not, -4 + ret i32 %mul +} diff --git a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll new file mode 100644 index 0000000..e64977c --- /dev/null +++ b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @sub1(i32 %x) { +; CHECK-LABEL: @sub1( +; CHECK-NEXT: [[Y:%.*]] = sub i32 0, [[X:%.*]] +; CHECK-NEXT: [[Z:%.*]] = sdiv i32 [[Y]], 337 +; CHECK-NEXT: ret i32 [[Z]] +; + %y = sub i32 0, %x + %z = sdiv i32 %y, 337 + ret i32 %z +} + +define i32 @sub2(i32 %x) { +; CHECK-LABEL: @sub2( +; CHECK-NEXT: [[Z:%.*]] = sdiv i32 [[X:%.*]], -337 +; CHECK-NEXT: ret i32 [[Z]] +; + %y = sub nsw i32 0, %x + %z = sdiv i32 %y, 337 + ret i32 %z +} + +define i1 @shl_icmp(i64 %X) { +; CHECK-LABEL: @shl_icmp( +; CHECK-NEXT: [[B:%.*]] = icmp eq i64 [[X:%.*]], 0 +; CHECK-NEXT: ret i1 [[B]] +; + %A = shl nuw i64 %X, 2 ; X/4 + %B = icmp eq i64 %A, 0 + ret i1 %B +} + +define i64 @shl1(i64 %X, i64* %P) { +; CHECK-LABEL: @shl1( +; CHECK-NEXT: [[A:%.*]] = and i64 [[X:%.*]], 312 +; CHECK-NEXT: store i64 [[A]], i64* [[P:%.*]], align 4 +; CHECK-NEXT: [[B:%.*]] = shl nuw nsw i64 [[A]], 8 +; CHECK-NEXT: ret i64 [[B]] +; + %A = and i64 %X, 312 + store i64 %A, i64* %P ; multiple uses of A. + %B = shl i64 %A, 8 + ret i64 %B +} + +define i32 @preserve1(i32 %x) { +; CHECK-LABEL: @preserve1( +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[X:%.*]], 5 +; CHECK-NEXT: ret i32 [[ADD3]] +; + %add = add nsw i32 %x, 2 + %add3 = add nsw i32 %add, 3 + ret i32 %add3 +} + +define i8 @nopreserve1(i8 %x) { +; CHECK-LABEL: @nopreserve1( +; CHECK-NEXT: [[ADD3:%.*]] = add i8 [[X:%.*]], -126 +; CHECK-NEXT: ret i8 [[ADD3]] +; + %add = add nsw i8 %x, 127 + %add3 = add nsw i8 %add, 3 + ret i8 %add3 +} + +define i8 @nopreserve2(i8 %x) { +; CHECK-LABEL: @nopreserve2( +; CHECK-NEXT: [[ADD3:%.*]] = add i8 [[X:%.*]], 3 +; CHECK-NEXT: ret i8 [[ADD3]] +; + %add = add i8 %x, 1 + %add3 = add nsw i8 %add, 2 + ret i8 %add3 +} + +define i8 @nopreserve3(i8 %A, i8 %B) { +; CHECK-LABEL: @nopreserve3( +; CHECK-NEXT: [[Y:%.*]] = add i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y]], 20 +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = add i8 %A, 10 + %y = add i8 %B, 10 + %add = add nsw i8 %x, %y + ret i8 %add +} + +define i8 @nopreserve4(i8 %A, i8 %B) { +; CHECK-LABEL: @nopreserve4( +; CHECK-NEXT: [[Y:%.*]] = add i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[Y]], 20 +; CHECK-NEXT: ret i8 [[ADD]] +; + %x = add nsw i8 %A, 10 + %y = add nsw i8 %B, 10 + %add = add nsw i8 %x, %y + ret i8 %add +} + +define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { +; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec( +; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> +; CHECK-NEXT: [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], +; CHECK-NEXT: ret <3 x i32> [[T3]] +; + %t2 = zext <2 x i8> %x to <2 x i32> + %shuf = shufflevector <2 x i32> %t2, <2 x i32> poison, <3 x i32> + %t3 = shl <3 x i32> %shuf, + ret <3 x i32> %t3 +} + +; Negative test - if the shuffle mask contains an undef, we bail out to +; avoid propagating information that may not be used consistently by callers. + +define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { +; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( +; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> +; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], +; CHECK-NEXT: ret <3 x i32> [[T3]] +; + %t2 = zext <2 x i8> %x to <2 x i32> + %shuf = shufflevector <2 x i32> %t2, <2 x i32> poison, <3 x i32> + %t3 = shl <3 x i32> %shuf, + ret <3 x i32> %t3 +} + +; Make sure we don't crash on a ConstantExpr shufflevector +define @mul_nuw_nsw_shuffle_constant_expr( %z) { +; CHECK-LABEL: @mul_nuw_nsw_shuffle_constant_expr( +; CHECK-NEXT: [[XX:%.*]] = zext [[Z:%.*]] to +; CHECK-NEXT: [[T3:%.*]] = mul [[XX]], shufflevector ( insertelement ( undef, i64 3, i32 0), zeroinitializer, zeroinitializer) +; CHECK-NEXT: ret [[T3]] +; + %xx = zext %z to + %shuf = shufflevector insertelement ( undef, i64 3, i32 0), zeroinitializer, zeroinitializer + %t3 = mul %shuf, %xx + ret %t3 +} diff --git a/llvm/test/Transforms/InstCombine/obfuscated_splat-inseltpoison.ll b/llvm/test/Transforms/InstCombine/obfuscated_splat-inseltpoison.ll new file mode 100644 index 0000000..cb87540 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/obfuscated_splat-inseltpoison.ll @@ -0,0 +1,11 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) { + %A = load <4 x float>, <4 x float>* %in_ptr, align 16 + %B = shufflevector <4 x float> %A, <4 x float> poison, <4 x i32> + %C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> + %D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> +; CHECK: %D = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %D, <4 x float> *%out_ptr + ret void +} diff --git a/llvm/test/Transforms/InstCombine/pr2645-0-inseltpoison.ll b/llvm/test/Transforms/InstCombine/pr2645-0-inseltpoison.ll new file mode 100644 index 0000000..a32a1ae --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr2645-0-inseltpoison.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | grep "insertelement <4 x float> poison" + +; Instcombine should be able to prove that none of the +; insertelement's first operand's elements are needed. + +define internal void @""(i8*) { +;