From f5d781d6273cc56dd8b44ee9e4cfb2ae5579bb04 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 27 Jun 2022 21:02:57 +0800 Subject: [PATCH] [X86] Support `_Float16` on SSE2 and up This is split from D113107 to address #56204 and https://discourse.llvm.org/t/how-to-build-compiler-rt-for-new-x86-half-float-abi/63366 Reviewed By: zahiraam, rjmccall, bkramer Differential Revision: https://reviews.llvm.org/D128571 --- clang/docs/LanguageExtensions.rst | 8 +++++- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/Basic/Targets/X86.cpp | 4 ++- clang/test/CodeGen/X86/Float16-arithmetic.c | 29 ++++++++++++++++++++++ .../{avx512fp16-complex.c => Float16-complex.c} | 1 + clang/test/Sema/Float16.c | 5 ++-- clang/test/Sema/conversion-target-dep.c | 2 +- clang/test/SemaCXX/Float16.cpp | 4 ++- 8 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/X86/Float16-arithmetic.c rename clang/test/CodeGen/X86/{avx512fp16-complex.c => Float16-complex.c} (96%) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index af697fa..1bac2ae 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -743,7 +743,13 @@ targets pending ABI standardization: * 64-bit ARM (AArch64) * AMDGPU * SPIR -* X86 (Only available under feature AVX512-FP16) +* X86 (see below) + +On X86 targets, ``_Float16`` is supported as long as SSE2 is available, which +includes all 64-bit and all recent 32-bit processors. When the target supports +AVX512-FP16, ``_Float16`` arithmetic is performed using that native support. +Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``, +performing the operation, and then truncating to ``_Float16``. ``_Float16`` will be supported on more targets as they define ABIs for it. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c884f74..f551a6f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -513,6 +513,9 @@ X86 Support in Clang - Support ``-mharden-sls=[none|all|return|indirect-jmp]`` for straight-line speculation hardening. +- Support for the ``_Float16`` type has been added for all targets with SSE2. + When AVX512-FP16 is not available, arithmetic on ``_Float16`` is emulated + using ``float``. DWARF Support in Clang ---------------------- diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b83b351..0b3d8783 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -239,7 +239,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasAVX512ER = true; } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; - HasFloat16 = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { @@ -355,6 +354,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, .Default(NoSSE); SSELevel = std::max(SSELevel, Level); + // Turn on _float16 for x86 (feature sse2) + HasFloat16 = SSELevel >= SSE2; + MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch(Feature) .Case("+3dnowa", AMD3DNowAthlon) .Case("+3dnow", AMD3DNow) diff --git a/clang/test/CodeGen/X86/Float16-arithmetic.c b/clang/test/CodeGen/X86/Float16-arithmetic.c new file mode 100644 index 0000000..726da22 --- /dev/null +++ b/clang/test/CodeGen/X86/Float16-arithmetic.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK + +// CHECK-NOT: fpext +// CHECK-NOT: fptrunc + +_Float16 add1(_Float16 a, _Float16 b) { + return a + b; +} + +_Float16 add2(_Float16 a, _Float16 b, _Float16 c) { + return a + b + c; +} + +_Float16 div(_Float16 a, _Float16 b) { + return a / b; +} + +_Float16 mul(_Float16 a, _Float16 b) { + return a * b; +} + +_Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { + return a * b + c * d; +} + +_Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) { + return (a - 6 * b) + c; +} diff --git a/clang/test/CodeGen/X86/avx512fp16-complex.c b/clang/test/CodeGen/X86/Float16-complex.c similarity index 96% rename from clang/test/CodeGen/X86/avx512fp16-complex.c rename to clang/test/CodeGen/X86/Float16-complex.c index 8a6b50e..ebb290c 100644 --- a/clang/test/CodeGen/X86/avx512fp16-complex.c +++ b/clang/test/CodeGen/X86/Float16-complex.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86 +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86 _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) { // X86-LABEL: @add_half_rr( diff --git a/clang/test/Sema/Float16.c b/clang/test/Sema/Float16.c index f0b9466..26c604f 100644 --- a/clang/test/Sema/Float16.c +++ b/clang/test/Sema/Float16.c @@ -1,5 +1,6 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE diff --git a/clang/test/Sema/conversion-target-dep.c b/clang/test/Sema/conversion-target-dep.c index 958a4d8..1b22cb8 100644 --- a/clang/test/Sema/conversion-target-dep.c +++ b/clang/test/Sema/conversion-target-dep.c @@ -6,7 +6,7 @@ long double ld; double d; -_Float16 f16; // x86-error {{_Float16 is not supported on this target}} +_Float16 f16; int main(void) { ld = d; // x86-warning {{implicit conversion increases floating-point precision: 'double' to 'long double'}} diff --git a/clang/test/SemaCXX/Float16.cpp b/clang/test/SemaCXX/Float16.cpp index f27c383..61b02a5 100644 --- a/clang/test/SemaCXX/Float16.cpp +++ b/clang/test/SemaCXX/Float16.cpp @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-pc -target-feature +sse2 %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE // RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE -- 2.7.4