From 86ff9d313a8521dd715fa9a8c1e7c5f09589f6b1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 25 Apr 2019 12:45:11 +0000 Subject: [PATCH] [InstCombine][X86] Add PACKSS/PACKUS tests for truncation where saturation won't occur llvm-svn: 359185 --- llvm/test/Transforms/InstCombine/X86/x86-pack.ll | 160 +++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll index f3c41a8..f61cc3a 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll @@ -350,6 +350,166 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { ret <64 x i8> %4 } +; +; Truncation (without Saturation) +; + +define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @trunc_packssdw_128( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %1 = ashr <4 x i32> %a0, + %2 = and <4 x i32> %a1, + %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) + ret <8 x i16> %3 +} + +define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @trunc_packusdw_128( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %1 = lshr <4 x i32> %a0, + %2 = and <4 x i32> %a1, + %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) + ret <8 x i16> %3 +} + +define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @trunc_packsswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %1 = ashr <8 x i16> %a0, + %2 = and <8 x i16> %a1, + %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) + ret <16 x i8> %3 +} + +define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @trunc_packuswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %1 = lshr <8 x i16> %a0, + %2 = and <8 x i16> %a1, + %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) + ret <16 x i8> %3 +} + +define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @trunc_packssdw_256( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %1 = ashr <8 x i32> %a0, + %2 = ashr <8 x i32> %a1, + %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) + ret <16 x i16> %3 +} + +define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @trunc_packusdw_256( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %1 = lshr <8 x i32> %a0, + %2 = and <8 x i32> %a1, + %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) + ret <16 x i16> %3 +} + +define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { +; CHECK-LABEL: @trunc_packsswb_256( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %1 = ashr <16 x i16> %a0, + %2 = and <16 x i16> %a1, + %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) + ret <32 x i8> %3 +} + +define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { +; CHECK-LABEL: @trunc_packuswb_256( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %1 = lshr <16 x i16> %a0, + %2 = and <16 x i16> %a1, + %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) + ret <32 x i8> %3 +} + +define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @trunc_packssdw_512( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = ashr <16 x i32> %a0, + %2 = ashr <16 x i32> %a1, + %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) + ret <32 x i16> %3 +} + +define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @trunc_packusdw_512( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = lshr <16 x i32> %a0, + %2 = and <16 x i32> %a1, + %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) + ret <32 x i16> %3 +} + +define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { +; CHECK-LABEL: @trunc_packsswb_512( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %1 = ashr <32 x i16> %a0, + %2 = and <32 x i16> %a1, + %3 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %1, <32 x i16> %2) + ret <64 x i8> %3 +} + +define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { +; CHECK-LABEL: @trunc_packuswb_512( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %1 = lshr <32 x i16> %a0, + %2 = and <32 x i16> %a1, + %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2) + ret <64 x i8> %3 +} + declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone -- 2.7.4