From 90dc78bc62784faaa55afb0320cf3c2187d80ac6 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 17 Feb 2020 17:03:52 +0000 Subject: [PATCH] [ARM,MVE] Add intrinsics for abs, neg and not operations. Summary: This commit adds the unpredicated intrinsics for the unary operations vabsq (absolute value), vnegq (arithmetic negation), vmvnq (bitwise complement), vqabsq and vqnegq (saturating versions of abs and neg for signed integers, in the sense that they give INT_MAX if an input lane is INT_MIN). This is done entirely in clang: all of these operations have existing isel patterns and existing tests for them on the LLVM side, so I've just made clang emit the same IR that those patterns already match. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D74331 --- clang/include/clang/Basic/arm_mve.td | 27 ++ clang/include/clang/Basic/arm_mve_defs.td | 4 + clang/lib/CodeGen/CGBuiltin.cpp | 13 + clang/test/CodeGen/arm-mve-intrinsics/absneg.c | 338 +++++++++++++++++++++++++ 4 files changed, 382 insertions(+) create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/absneg.c diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 5cd88b0..dfc0ee8 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -234,6 +234,33 @@ let params = T.Unsigned in { defm vdwdup: vxdup_mc<(? u32:$limit, imm_1248:$step), (? $limit, $step)>; } +let params = T.Int in { + def vmvnq: Intrinsic; +} +let params = T.Signed in { + def vnegq: Intrinsic; + def vabsq: Intrinsic; + def vqnegq: Intrinsic; + def vqabsq: Intrinsic; +} +let params = T.Float in { + def vnegq_f: Intrinsic, + NameOverride<"vnegq">; + def vabsq_f: Intrinsic $a)>, NameOverride<"vabsq">; +} + // The bitcasting below is not overcomplicating the IR because while // Vector and UVector may be different vector types at the C level i.e. // vectors of same size signed/unsigned ints. Once they're lowered diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index d4e8215..2d080f2 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -98,6 +98,9 @@ def extend: CGHelperFn<"SignOrZeroExtend"> { let special_params = [IRBuilderIntParam<2, "bool">]; } def zeroinit: IRFunction<"llvm::Constant::getNullValue">; +def int_min: CGHelperFn<"ARMMVEConstantSplat<1,0>">; +def int_max: CGHelperFn<"ARMMVEConstantSplat<0,1>">; +def uint_max: CGHelperFn<"ARMMVEConstantSplat<1,1>">; def undef: IRFunction<"UndefValue::get">; def icmp_eq: IRBuilder<"CreateICmpEQ">; def icmp_ne: IRBuilder<"CreateICmpNE">; @@ -117,6 +120,7 @@ def fcmp_lt: IRBuilder<"CreateFCmpOLT">; def fcmp_le: IRBuilder<"CreateFCmpOLE">; def splat: CGHelperFn<"ARMMVEVectorSplat">; def select: IRBuilder<"CreateSelect">; +def fneg: IRBuilder<"CreateFNeg">; // A node that makes an Address out of a pointer-typed Value, by // providing an alignment as the second argument. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5e411bc..0081740 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7056,6 +7056,19 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, } } +template +static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { + // MVE-specific helper function to make a vector splat of a constant such as + // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. + llvm::Type *T = VT->getVectorElementType(); + unsigned LaneBits = T->getPrimitiveSizeInBits(); + uint32_t Value = HighBit << (LaneBits - 1); + if (OtherBits) + Value |= (1UL << (LaneBits - 1)) - 1; + llvm::Value *Lane = llvm::ConstantInt::get(T, Value); + return ARMMVEVectorSplat(Builder, Lane); +} + Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c new file mode 100644 index 0000000..db4253f --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c @@ -0,0 +1,338 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vabsq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vabsq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vabsq(a); +#else /* POLYMORPHIC */ + return vabsq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vabsq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vabsq(a); +#else /* POLYMORPHIC */ + return vabsq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[A]] +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vabsq_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vabsq(a); +#else /* POLYMORPHIC */ + return vabsq_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i16> [[A:%.*]], zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vabsq_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vabsq(a); +#else /* POLYMORPHIC */ + return vabsq_s16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vabsq_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vabsq(a); +#else /* POLYMORPHIC */ + return vabsq_s32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vmvnq_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vmvnq(a); +#else /* POLYMORPHIC */ + return vmvnq_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vmvnq_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vmvnq(a); +#else /* POLYMORPHIC */ + return vmvnq_s16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vmvnq_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vmvnq(a); +#else /* POLYMORPHIC */ + return vmvnq_s32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vmvnq_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vmvnq(a); +#else /* POLYMORPHIC */ + return vmvnq_u8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vmvnq_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vmvnq(a); +#else /* POLYMORPHIC */ + return vmvnq_u16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vmvnq_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vmvnq(a); +#else /* POLYMORPHIC */ + return vmvnq_u32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = fneg <8 x half> [[A:%.*]] +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vnegq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC + return vnegq(a); +#else /* POLYMORPHIC */ + return vnegq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = fneg <4 x float> [[A:%.*]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vnegq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC + return vnegq(a); +#else /* POLYMORPHIC */ + return vnegq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = sub <16 x i8> zeroinitializer, [[A:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vnegq_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vnegq(a); +#else /* POLYMORPHIC */ + return vnegq_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = sub <8 x i16> zeroinitializer, [[A:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vnegq_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vnegq(a); +#else /* POLYMORPHIC */ + return vnegq_s16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vnegq_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vnegq(a); +#else /* POLYMORPHIC */ + return vnegq_s32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqabsq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], +// CHECK-NEXT: [[TMP2:%.*]] = sub <16 x i8> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> , <16 x i8> [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[TMP3]] +// CHECK-NEXT: ret <16 x i8> [[TMP4]] +// +int8x16_t test_vqabsq_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vqabsq(a); +#else /* POLYMORPHIC */ + return vqabsq_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqabsq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], +// CHECK-NEXT: [[TMP2:%.*]] = sub <8 x i16> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[TMP3]] +// CHECK-NEXT: ret <8 x i16> [[TMP4]] +// +int16x8_t test_vqabsq_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vqabsq(a); +#else /* POLYMORPHIC */ + return vqabsq_s16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqabsq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], +// CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[TMP3]] +// CHECK-NEXT: ret <4 x i32> [[TMP4]] +// +int32x4_t test_vqabsq_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vqabsq(a); +#else /* POLYMORPHIC */ + return vqabsq_s32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqnegq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], +// CHECK-NEXT: [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> , <16 x i8> [[TMP1]] +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqnegq_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vqnegq(a); +#else /* POLYMORPHIC */ + return vqnegq_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqnegq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> , <8 x i16> [[TMP1]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqnegq_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vqnegq(a); +#else /* POLYMORPHIC */ + return vqnegq_s16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqnegq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[A]] +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> , <4 x i32> [[TMP1]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vqnegq_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vqnegq(a); +#else /* POLYMORPHIC */ + return vqnegq_s32(a); +#endif /* POLYMORPHIC */ +} + -- 2.7.4