From: Sanjay Patel Date: Tue, 25 Feb 2020 13:54:58 +0000 (-0500) Subject: [PhaseOrdering] add test for missing vector/CSE transforms (PR45015); NFC X-Git-Tag: llvmorg-12-init~13715 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f452f7b95a8c20f187f67dc57dd4778ba09090b3;p=platform%2Fupstream%2Fllvm.git [PhaseOrdering] add test for missing vector/CSE transforms (PR45015); NFC --- diff --git a/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll new file mode 100644 index 0000000..b8fac0f --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O3 -S -mtriple=x86_64-- -mattr=avx | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; TODO: Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle. +; That may require some coordination between VectorCombine, SLP, and other passes. +; The end goal is to get a single "vaddsubps" instruction for x86 with AVX. + +define <4 x float> @PR45015(<4 x float> %arg, <4 x float> %arg1) { +; CHECK-LABEL: @PR45015( +; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[ARG:%.*]], [[ARG1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]] +; CHECK-NEXT: [[T8:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub <4 x float> [[ARG]], [[ARG1]] +; CHECK-NEXT: [[T12:%.*]] = shufflevector <4 x float> [[T8]], <4 x float> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]] +; CHECK-NEXT: [[T16:%.*]] = shufflevector <4 x float> [[T12]], <4 x float> [[TMP4]], <4 x i32> +; CHECK-NEXT: ret <4 x float> [[T16]] +; + %t = extractelement <4 x float> %arg, i32 0 + %t2 = extractelement <4 x float> %arg1, i32 0 + %t3 = fsub float %t, %t2 + %t4 = insertelement <4 x float> undef, float %t3, i32 0 + %t5 = extractelement <4 x float> %arg, i32 1 + %t6 = extractelement <4 x float> %arg1, i32 1 + %t7 = fadd float %t5, %t6 + %t8 = insertelement <4 x float> %t4, float %t7, i32 1 + %t9 = extractelement <4 x float> %arg, i32 2 + %t10 = extractelement <4 x float> %arg1, i32 2 + %t11 = fsub float %t9, %t10 + %t12 = insertelement <4 x float> %t8, float %t11, i32 2 + %t13 = extractelement <4 x float> %arg, i32 3 + %t14 = extractelement <4 x float> %arg1, i32 3 + %t15 = fadd float %t13, %t14 + %t16 = insertelement <4 x float> %t12, float %t15, i32 3 + ret <4 x float> %t16 +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/lit.local.cfg b/llvm/test/Transforms/PhaseOrdering/X86/lit.local.cfg new file mode 100644 index 0000000..c8625f4 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'X86' in config.root.targets: + config.unsupported = True