From 9961c55e28e50c5a5ef757865d49c514754bbad7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 13 Jan 2019 21:21:46 +0000 Subject: [PATCH] [X86] Add some basic add/sub saturation combine tests. The actual combines will be added in a future commit. llvm-svn: 351023 --- llvm/test/CodeGen/X86/combine-add-ssat.ll | 114 ++++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/combine-add-usat.ll | 102 ++++++++++++++++++++++++++ llvm/test/CodeGen/X86/combine-sub-ssat.ll | 44 ++++++++++++ llvm/test/CodeGen/X86/combine-sub-usat.ll | 40 +++++++++++ 4 files changed, 300 insertions(+) create mode 100644 llvm/test/CodeGen/X86/combine-add-ssat.ll create mode 100644 llvm/test/CodeGen/X86/combine-add-usat.ll create mode 100644 llvm/test/CodeGen/X86/combine-sub-ssat.ll create mode 100644 llvm/test/CodeGen/X86/combine-sub-usat.ll diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll new file mode 100644 index 0000000..1da3229 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW + +declare i32 @llvm.sadd.sat.i32 (i32, i32) +declare i64 @llvm.sadd.sat.i64 (i64, i64) +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) + +; fold (sadd_sat c, x) -> (sadd_sat x, c) +define i32 @combine_constant_i32(i32 %a0) { +; CHECK-LABEL: combine_constant_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: addl $1, %ecx +; CHECK-NEXT: setns %al +; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; CHECK-NEXT: addl $1, %edi +; CHECK-NEXT: cmovnol %edi, %eax +; CHECK-NEXT: retq + %res = call i32 @llvm.sadd.sat.i32(i32 1, i32 %a0); + ret i32 %res; +} + +define <8 x i16> @combine_constant_v8i16(<8 x i16> %a0) { +; SSE-LABEL: combine_constant_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: paddsw {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_constant_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpaddsw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> , <8 x i16> %a0); + ret <8 x i16> %res; +} + +; fold (sadd_sat c, 0) -> x +define i32 @combine_zero_i32(i32 %a0) { +; CHECK-LABEL: combine_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: addl $0, %ecx +; CHECK-NEXT: setns %al +; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; CHECK-NEXT: addl $0, %edi +; CHECK-NEXT: cmovnol %edi, %eax +; CHECK-NEXT: retq + %1 = call i32 @llvm.sadd.sat.i32(i32 %a0, i32 0); + ret i32 %1 +} + +define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) { +; SSE-LABEL: combine_zero_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: paddsw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_zero_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> zeroinitializer); + ret <8 x i16> %1 +} + +; fold (sadd_sat x, y) -> (add x, y) iff no overflow +define i32 @combine_no_overflow_i32(i32 %a0, i32 %a1) { +; CHECK-LABEL: combine_no_overflow_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: sarl $16, %edi +; CHECK-NEXT: shrl $16, %esi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: setns %al +; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: cmovnol %esi, %eax +; CHECK-NEXT: retq + %1 = ashr i32 %a0, 16 + %2 = lshr i32 %a1, 16 + %3 = call i32 @llvm.sadd.sat.i32(i32 %1, i32 %2); + ret i32 %3 +} + +define <8 x i16> @combine_no_overflow_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: combine_no_overflow_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: psraw $10, %xmm0 +; SSE-NEXT: psrlw $10, %xmm1 +; SSE-NEXT: paddsw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_no_overflow_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpsraw $10, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $10, %xmm1, %xmm1 +; AVX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = ashr <8 x i16> %a0, + %2 = lshr <8 x i16> %a1, + %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2); + ret <8 x i16> %3 +} diff --git a/llvm/test/CodeGen/X86/combine-add-usat.ll b/llvm/test/CodeGen/X86/combine-add-usat.ll new file mode 100644 index 0000000..531f744 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-add-usat.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW + +declare i32 @llvm.uadd.sat.i32 (i32, i32) +declare i64 @llvm.uadd.sat.i64 (i64, i64) +declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) + +; fold (uadd_sat c, x) -> (add_ssat x, c) +define i32 @combine_constant_i32(i32 %a0) { +; CHECK-LABEL: combine_constant_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addl $1, %edi +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovael %edi, %eax +; CHECK-NEXT: retq + %1 = call i32 @llvm.uadd.sat.i32(i32 1, i32 %a0); + ret i32 %1 +} + +define <8 x i16> @combine_constant_v8i16(<8 x i16> %a0) { +; SSE-LABEL: combine_constant_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: paddusw {{.*}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_constant_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpaddusw {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> , <8 x i16> %a0); + ret <8 x i16> %1 +} + +; fold (uadd_sat c, 0) -> x +define i32 @combine_zero_i32(i32 %a0) { +; CHECK-LABEL: combine_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addl $0, %edi +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovael %edi, %eax +; CHECK-NEXT: retq + %1 = call i32 @llvm.uadd.sat.i32(i32 %a0, i32 0); + ret i32 %1 +} + +define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) { +; SSE-LABEL: combine_zero_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: paddusw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_zero_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a0, <8 x i16> zeroinitializer); + ret <8 x i16> %1 +} + +; fold (uadd_sat x, y) -> (add x, y) iff no overflow +define i32 @combine_no_overflow_i32(i32 %a0, i32 %a1) { +; CHECK-LABEL: combine_no_overflow_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: shrl $16, %edi +; CHECK-NEXT: shrl $16, %esi +; CHECK-NEXT: addl %edi, %esi +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: retq + %1 = lshr i32 %a0, 16 + %2 = lshr i32 %a1, 16 + %3 = call i32 @llvm.uadd.sat.i32(i32 %1, i32 %2); + ret i32 %3 +} + +define <8 x i16> @combine_no_overflow_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: combine_no_overflow_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: psrlw $10, %xmm0 +; SSE-NEXT: psrlw $10, %xmm1 +; SSE-NEXT: paddusw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_no_overflow_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpsrlw $10, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $10, %xmm1, %xmm1 +; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = lshr <8 x i16> %a0, + %2 = lshr <8 x i16> %a1, + %3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2); + ret <8 x i16> %3 +} diff --git a/llvm/test/CodeGen/X86/combine-sub-ssat.ll b/llvm/test/CodeGen/X86/combine-sub-ssat.ll new file mode 100644 index 0000000..8d53300 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-sub-ssat.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW + +declare i32 @llvm.ssub.sat.i32 (i32, i32) +declare i64 @llvm.ssub.sat.i64 (i64, i64) +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) + +; fold (ssub_sat c, 0) -> x +define i32 @combine_zero_i32(i32 %a0) { +; CHECK-LABEL: combine_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: subl $0, %ecx +; CHECK-NEXT: setns %al +; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF +; CHECK-NEXT: subl $0, %edi +; CHECK-NEXT: cmovnol %edi, %eax +; CHECK-NEXT: retq + %1 = call i32 @llvm.ssub.sat.i32(i32 %a0, i32 0); + ret i32 %1 +} + +define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) { +; SSE-LABEL: combine_zero_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psubsw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_zero_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> zeroinitializer); + ret <8 x i16> %1 +} diff --git a/llvm/test/CodeGen/X86/combine-sub-usat.ll b/llvm/test/CodeGen/X86/combine-sub-usat.ll new file mode 100644 index 0000000..0568b28 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-sub-usat.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512BW + +declare i32 @llvm.usub.sat.i32 (i32, i32) +declare i64 @llvm.usub.sat.i64 (i64, i64) +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) + +; fold (usub_sat c, 0) -> x +define i32 @combine_zero_i32(i32 %a0) { +; CHECK-LABEL: combine_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: subl $0, %edi +; CHECK-NEXT: cmovael %edi, %eax +; CHECK-NEXT: retq + %1 = call i32 @llvm.usub.sat.i32(i32 %a0, i32 0); + ret i32 %1 +} + +define <8 x i16> @combine_zero_v8i16(<8 x i16> %a0) { +; SSE-LABEL: combine_zero_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psubusw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_zero_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %1 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a0, <8 x i16> zeroinitializer); + ret <8 x i16> %1 +} -- 2.7.4