From 173d4b84f61469c627e251753a034e7300faaabe Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 14 Jul 2022 12:36:22 +0200 Subject: [PATCH] [VP] Add test to show optimization opportunities Add vp.add test cases that can are optimized with D92086 to show the potential of generalized pattern rewriting. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D129746 --- llvm/test/Transforms/InstSimplify/add_vp.ll | 91 +++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 llvm/test/Transforms/InstSimplify/add_vp.ll diff --git a/llvm/test/Transforms/InstSimplify/add_vp.ll b/llvm/test/Transforms/InstSimplify/add_vp.ll new file mode 100644 index 0000000..f8e6a4c --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/add_vp.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s + +declare <2 x i32> @llvm.vp.add.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) +declare <2 x i32> @llvm.vp.sub.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) +declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +; Constant folding should just work. +define <2 x i32> @constant_vp_add(<2 x i1> %mask, i32 %evl) { +; CHECK-LABEL: @constant_vp_add( +; CHECK-NEXT: [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> , <2 x i32> , <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[Q]] +; + %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> , <2 x i32> , <2 x i1> %mask, i32 %evl) + ret <2 x i32> %Q +} + +; Simplifying pure VP intrinsic patterns. +define <2 x i32> @common_sub_operand(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) { +; CHECK-LABEL: @common_sub_operand( +; CHECK-NEXT: [[Z:%.*]] = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> [[Z]], <2 x i32> [[Y]], <2 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret <2 x i32> [[Q]] +; + ; %Z = sub i32 %X, %Y, vp(%mask, %evl) + %Z = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) + ; %Q = add i32 %Z, %Y, vp(%mask, %evl) + %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %Z, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) + ret <2 x i32> %Q +} + +; Mixing regular SIMD with vp intrinsics (vp add match root). +define <2 x i32> @common_sub_operand_vproot(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) { +; CHECK-LABEL: @common_sub_operand_vproot( +; CHECK-NEXT: [[Z:%.*]] = sub <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> [[Z]], <2 x i32> [[Y]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[Q]] +; + %Z = sub <2 x i32> %X, %Y + ; %Q = add i32 %Z, %Y, vp(%mask, %evl) + %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %Z, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) + ret <2 x i32> %Q +} + +; Mixing regular SIMD with vp intrinsics (vp inside pattern, regular instruction root). +define <2 x i32> @common_sub_operand_vpinner(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) { +; CHECK-LABEL: @common_sub_operand_vpinner( +; CHECK-NEXT: [[Z:%.*]] = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: [[Q:%.*]] = add <2 x i32> [[Z]], [[Y]] +; CHECK-NEXT: ret <2 x i32> [[Q]] +; + ; %Z = sub i32 %X, %Y, vp(%mask, %evl) + %Z = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) + %Q = add <2 x i32> %Z, %Y + ret <2 x i32> %Q +} + +define <2 x i32> @negated_operand(<2 x i32> %x, <2 x i1> %mask, i32 %evl) { +; CHECK-LABEL: @negated_operand( +; CHECK-NEXT: [[NEGX:%.*]] = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> [[NEGX]], <2 x i32> [[X]], <2 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret <2 x i32> [[R]] +; + ; %negx = sub i32 0, %x + %negx = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> zeroinitializer, <2 x i32> %x, <2 x i1> %mask, i32 %evl) + ; %r = add i32 %negx, %x + %r = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %negx, <2 x i32> %x, <2 x i1> %mask, i32 %evl) + ret <2 x i32> %r +} + +; TODO Lift InstSimplify::SimplifyAdd to the trait framework to optimize this. +define <2 x i8> @knownnegation(<2 x i8> %x, <2 x i8> %y, <2 x i1> %mask, i32 %evl) { +; TODO-CHECK-LABEL: @knownnegation( +; TODO-XHECK-NEXT: ret i8 <2 x i8> zeroinitializer +; + ; %xy = sub i8 %x, %y +; CHECK-LABEL: @knownnegation( +; CHECK-NEXT: [[XY:%.*]] = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]]) +; CHECK-NEXT: [[YX:%.*]] = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> [[Y]], <2 x i8> [[X]], <2 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> [[XY]], <2 x i8> [[YX]], <2 x i1> [[MASK]], i32 [[EVL]]) +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %xy = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i1> %mask, i32 %evl) + ; %yx = sub i8 %y, %x + %yx = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %y, <2 x i8> %x, <2 x i1> %mask, i32 %evl) + ; %r = add i8 %xy, %yx + %r = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %xy, <2 x i8> %yx, <2 x i1> %mask, i32 %evl) + ret <2 x i8> %r +} -- 2.7.4