From ddbdff3accb667860d4cbd436746ca28ea343b25 Mon Sep 17 00:00:00 2001 From: Francesco Petrogalli Date: Fri, 26 Jun 2020 20:06:06 +0000 Subject: [PATCH] [sve][acle] Recommit https://reviews.llvm.org/D82501 The original patch was reverted in https://github.com/llvm/llvm-project/commit/ff5ccf258e297df29f32d6b5e4fa0a7b95c44f9c as it was missing the C tests that got accidentally missing. This patch is a NFC of https://reviews.llvm.org/D82501, together with the SVE ACLE tests for the C intrinsics of svreinterpret for brain float types. --- .../acle_sve_reinterpret-bfloat.c | 177 +++++++++++++++++++++ clang/utils/TableGen/SveEmitter.cpp | 12 +- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 19 ++- llvm/test/CodeGen/AArch64/sve-bitcast.ll | 115 +++++++++++++ 4 files changed, 319 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c new file mode 100644 index 0000000..5cacc8a --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_reinterpret-bfloat.c @@ -0,0 +1,177 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svint8_t test_svreinterpret_s8_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_s8_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s8, _bf16, , )(op); +} + +svint16_t test_svreinterpret_s16_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_s16_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s16, _bf16, , )(op); +} + +svint32_t test_svreinterpret_s32_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_s32_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s32, _bf16, , )(op); +} +svint64_t test_svreinterpret_s64_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_s64_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_s64, _bf16, , )(op); +} + +svuint8_t test_svreinterpret_u8_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_u8_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u8, _bf16, , )(op); +} + +svuint16_t test_svreinterpret_u16_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_u16_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u16, _bf16, , )(op); +} + +svuint32_t test_svreinterpret_u32_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_u32_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u32, _bf16, , )(op); +} + +svuint64_t test_svreinterpret_u64_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_u64_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_u64, _bf16, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_s8(svint8_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_s8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _s8, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_s16(svint16_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_s16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _s16, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_s32(svint32_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_s32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _s32, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_s64(svint64_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_s64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _s64, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_u8(svuint8_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_u8 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _u8, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_u16(svuint16_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_u16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _u16, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_u32(svuint32_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_u32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _u32, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_u64(svuint64_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_u64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _u64, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_bf16 + // CHECK: ret %op + return SVE_ACLE_FUNC(svreinterpret_bf16, _bf16, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_f16(svfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_f16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _f16, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_f32(svfloat32_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_f32 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _f32, , )(op); +} + +svbfloat16_t test_svreinterpret_bf16_f64(svfloat64_t op) { + // CHECK-LABEL: test_svreinterpret_bf16_f64 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_bf16, _f64, , )(op); +} + +svfloat32_t test_svreinterpret_f32_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_f32_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f32, _bf16, , )(op); +} + +svfloat16_t test_svreinterpret_f16_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_f16_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f16, _bf16, , )(op); +} + +svfloat64_t test_svreinterpret_f64_bf16(svbfloat16_t op) { + // CHECK-LABEL: test_svreinterpret_f64_bf16 + // CHECK: %[[CAST:.*]] = bitcast %op to + // CHECK: ret %[[CAST]] + return SVE_ACLE_FUNC(svreinterpret_f64, _bf16, , )(op); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp index 45db0a6..a02df1c 100644 --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -248,13 +248,13 @@ private: const char *Type; const char *BuiltinType; }; - SmallVector Reinterprets = { + SmallVector Reinterprets = { {"s8", "svint8_t", "q16Sc"}, {"s16", "svint16_t", "q8Ss"}, {"s32", "svint32_t", "q4Si"}, {"s64", "svint64_t", "q2SWi"}, {"u8", "svuint8_t", "q16Uc"}, {"u16", "svuint16_t", "q8Us"}, {"u32", "svuint32_t", "q4Ui"}, {"u64", "svuint64_t", "q2UWi"}, - {"f16", "svfloat16_t", "q8h"}, {"f32", "svfloat32_t", "q4f"}, - {"f64", "svfloat64_t", "q2d"}}; + {"f16", "svfloat16_t", "q8h"}, {"bf16", "svbfloat16_t", "q8y"}, + {"f32", "svfloat32_t", "q4f"}, {"f64", "svfloat64_t", "q2d"}}; RecordKeeper &Records; llvm::StringMap EltTypes; @@ -1208,6 +1208,10 @@ void SVEEmitter::createHeader(raw_ostream &OS) { for (auto ShortForm : { false, true } ) for (const ReinterpretTypeInfo &From : Reinterprets) for (const ReinterpretTypeInfo &To : Reinterprets) { + const bool IsBFloat = StringRef(From.Suffix).equals("bf16") || + StringRef(To.Suffix).equals("bf16"); + if (IsBFloat) + OS << "#if defined(__ARM_FEATURE_SVE_BF16)\n"; if (ShortForm) { OS << "__aio " << From.Type << " svreinterpret_" << From.Suffix; OS << "(" << To.Type << " op) {\n"; @@ -1218,6 +1222,8 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "#define svreinterpret_" << From.Suffix << "_" << To.Suffix << "(...) __builtin_sve_reinterpret_" << From.Suffix << "_" << To.Suffix << "(__VA_ARGS__)\n"; + if (IsBFloat) + OS << "#endif /* #if defined(__ARM_FEATURE_SVE_BF16) */\n"; } SmallVector, 128> Defs; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index bfffbe4..7b807fd 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1466,7 +1466,6 @@ multiclass sve_prefetch; def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>; @@ -1487,6 +1486,24 @@ multiclass sve_prefetch; } + let Predicates = [IsLE, HasSVE, HasBF16] in { + def : Pat<(nxv8bf16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv8f16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv2f64 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + + def : Pat<(nxv16i8 (bitconvert (nxv8bf16 ZPR:$src))), (nxv16i8 ZPR:$src)>; + def : Pat<(nxv8i16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8i16 ZPR:$src)>; + def : Pat<(nxv4i32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4i32 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>; + def : Pat<(nxv4f32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4f32 ZPR:$src)>; + def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>; + } + def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll index ab70dea..670c986 100644 --- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll @@ -340,3 +340,118 @@ define @bitcast_float_to_double( %v) { %bc = bitcast %v to ret %bc } + +define @bitcast_bfloat_to_i8( %v) #0 { +; CHECK-LABEL: bitcast_bfloat_to_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_bfloat_to_i16( %v) #0 { +; CHECK-LABEL: bitcast_bfloat_to_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_bfloat_to_i32( %v) #0 { +; CHECK-LABEL: bitcast_bfloat_to_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_bfloat_to_i64( %v) #0 { +; CHECK-LABEL: bitcast_bfloat_to_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_bfloat_to_half( %v) #0 { +; CHECK-LABEL: bitcast_bfloat_to_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_bfloat_to_float( %v) #0 { +; CHECK-LABEL: bitcast_bfloat_to_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_bfloat_to_double( %v) #0 { +; CHECK-LABEL: bitcast_bfloat_to_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i8_to_bfloat( %v) #0 { +; CHECK-LABEL: bitcast_i8_to_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i16_to_bfloat( %v) #0 { +; CHECK-LABEL: bitcast_i16_to_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i32_to_bfloat( %v) #0 { +; CHECK-LABEL: bitcast_i32_to_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_i64_to_bfloat( %v) #0 { +; CHECK-LABEL: bitcast_i64_to_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_half_to_bfloat( %v) #0 { +; CHECK-LABEL: bitcast_half_to_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_float_to_bfloat( %v) #0 { +; CHECK-LABEL: bitcast_float_to_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +define @bitcast_double_to_bfloat( %v) #0 { +; CHECK-LABEL: bitcast_double_to_bfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %bc = bitcast %v to + ret %bc +} + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } -- 2.7.4