[x86] add tests for psubus; NFC

author Sanjay Patel <spatel@rotateright.com>

Tue, 19 Oct 2021 17:58:06 +0000 (13:58 -0400)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 19 Oct 2021 20:41:18 +0000 (16:41 -0400)
author Sanjay Patel <spatel@rotateright.com>
Tue, 19 Oct 2021 17:58:06 +0000 (13:58 -0400)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 19 Oct 2021 20:41:18 +0000 (16:41 -0400)
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll

index d84c873..197e183 100644 (file)
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -9,6 +9,8 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX512
  
+declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
+
  define <8 x i16> @test1(<8 x i16> %x) nounwind {
  ; SSE-LABEL: test1:
  ; SSE:       # %bb.0: # %vector.ph
@@ -90,7 +92,7 @@ define <16 x i8> @ashr_xor_and_commute_uses(<16 x i8> %x, <16 x i8>* %p1, <16 x
    ret <16 x i8> %res
  }
  
-define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %p2) nounwind {
+define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x) nounwind {
  ; SSE-LABEL: ashr_xor_and_custom:
  ; SSE:       # %bb.0:
  ; SSE-NEXT:    movdqa %xmm0, %xmm1
@@ -125,6 +127,48 @@ define <4 x i32> @ashr_xor_and_custom(<4 x i32> %x, <4 x i32>* %p1, <4 x i32>* %
    ret <4 x i32> %res
  }
  
+define <4 x i32> @usubsat_custom(<4 x i32> %x) nounwind {
+; SSE2OR3-LABEL: usubsat_custom:
+; SSE2OR3:       # %bb.0:
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT:    pxor %xmm0, %xmm1
+; SSE2OR3-NEXT:    pxor %xmm2, %xmm2
+; SSE2OR3-NEXT:    pcmpgtd %xmm2, %xmm1
+; SSE2OR3-NEXT:    psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2OR3-NEXT:    pand %xmm1, %xmm0
+; SSE2OR3-NEXT:    retq
+;
+; SSE41-LABEL: usubsat_custom:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u>
+; SSE41-NEXT:    pmaxud %xmm1, %xmm0
+; SSE41-NEXT:    psubd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: usubsat_custom:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = <2147483648,2147483648,2147483648,u>
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: usubsat_custom:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: usubsat_custom:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %res = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 undef>)
+  ret <4 x i32> %res
+}
+
  define <8 x i16> @test2(<8 x i16> %x) nounwind {
  ; SSE-LABEL: test2:
  ; SSE:       # %bb.0: # %vector.ph
@@ -302,6 +346,47 @@ vector.ph:
    ret <16 x i16> %res
  }
  
+define <16 x i16> @ashr_xor_and_v16i16(<16 x i16> %x) nounwind {
+; SSE-LABEL: ashr_xor_and_v16i16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    psraw $15, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psraw $15, %xmm3
+; SSE-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT:    pand %xmm3, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: ashr_xor_and_v16i16:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsraw $15, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ashr_xor_and_v16i16:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm1
+; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: ashr_xor_and_v16i16:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm1
+; AVX512-NEXT:    vpternlogq $72, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %signsplat = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %flipsign = xor <16 x i16> %x, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
+  %res = and <16 x i16> %signsplat, %flipsign
+  ret <16 x i16> %res
+}
+
  define <16 x i16> @test8(<16 x i16> %x) nounwind {
  ; SSE-LABEL: test8:
  ; SSE:       # %bb.0: # %vector.ph
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 19 Oct 2021 17:58:06 +0000 (13:58 -0400)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 19 Oct 2021 20:41:18 +0000 (16:41 -0400)