From 2c8f49e67bf6ea08aca1f69bb4d40ce0fbeac4f0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Nov 2016 23:24:34 +0000 Subject: [PATCH] [AVX-512] Use scalar vfmsub/vfnmsub mask3 intrinsics instead of inverting the mask argument of a vfmadd intrinsic. Summary: Inverting the mask argument does not reflect the intended semantics of the intrinsic. Reviewers: igorb, delena Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D26019 llvm-svn: 286733 --- clang/include/clang/Basic/BuiltinsX86.def | 4 ++++ clang/lib/Headers/avx512fintrin.h | 32 +++++++++++++++---------------- clang/test/CodeGen/avx512f-builtins.c | 16 ++++++++-------- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 07b20b3..e65fb11 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1743,6 +1743,10 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "", "avx512f" TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask, "V2dV2dV2dV2dUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_permvarhi512_mask, "V32sV32sV32sV32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_permvardf512_mask, "V8dV8dV8LLiV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_permvardi512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f") diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index d7f8cf4..1992bd84 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8470,17 +8470,17 @@ _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, + return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W, (__v4sf) __X, - -(__v4sf) __Y, + (__v4sf) __Y, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ - (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ + (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ - -(__v4sf)(__m128)(Y), (__mmask8)(U), \ + (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)); }) static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -8566,17 +8566,17 @@ _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { - return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, + return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W, (__v4sf) __X, - -(__v4sf) __Y, + (__v4sf) __Y, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ - (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ + (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \ (__v4sf)(__m128)(X), \ - -(__v4sf)(__m128)(Y), (__mmask8)(U), \ + (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -8662,17 +8662,17 @@ _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, + return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W, (__v2df) __X, - -(__v2df) __Y, + (__v2df) __Y, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ - (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ + (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ - -(__v2df)(__m128d)(Y), \ + (__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R)); }) static __inline__ __m128d __DEFAULT_FN_ATTRS @@ -8759,17 +8759,17 @@ _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { - return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W), + return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W), (__v2df) __X, - -(__v2df) (__Y), + (__v2df) (__Y), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ - (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ + (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \ (__v2df)(__m128d)(X), \ - -(__v2df)(__m128d)(Y), \ + (__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R)); }) #define _mm512_permutex_pd(X, C) __extension__ ({ \ diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 0801177..1e3baf0 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -5813,13 +5813,13 @@ __m128 test_mm_maskz_fmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __m128 test_mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fmsub_ss - // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss + // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss return _mm_mask3_fmsub_ss(__W, __X, __Y, __U); } __m128 test_mm_mask3_fmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fmsub_round_ss - // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss + // CHECK: @llvm.x86.avx512.mask3.vfmsub.ss return _mm_mask3_fmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION); } @@ -5885,13 +5885,13 @@ __m128 test_mm_maskz_fnmsub_round_ss(__mmask8 __U, __m128 __A, __m128 __B, __m12 __m128 test_mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fnmsub_ss - // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss + // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ss return _mm_mask3_fnmsub_ss(__W, __X, __Y, __U); } __m128 test_mm_mask3_fnmsub_round_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fnmsub_round_ss - // CHECK: @llvm.x86.avx512.mask3.vfmadd.ss + // CHECK: @llvm.x86.avx512.mask3.vfnmsub.ss return _mm_mask3_fnmsub_round_ss(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION); } @@ -5957,13 +5957,13 @@ __m128d test_mm_maskz_fmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __m __m128d test_mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fmsub_sd - // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd + // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd return _mm_mask3_fmsub_sd(__W, __X, __Y, __U); } __m128d test_mm_mask3_fmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fmsub_round_sd - // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd + // CHECK: @llvm.x86.avx512.mask3.vfmsub.sd return _mm_mask3_fmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION); } @@ -6029,13 +6029,13 @@ __m128d test_mm_maskz_fnmsub_round_sd(__mmask8 __U, __m128d __A, __m128d __B, __ __m128d test_mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fnmsub_sd - // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd + // CHECK: @llvm.x86.avx512.mask3.vfnmsub.sd return _mm_mask3_fnmsub_sd(__W, __X, __Y, __U); } __m128d test_mm_mask3_fnmsub_round_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U){ // CHECK-LABEL: @test_mm_mask3_fnmsub_round_sd - // CHECK: @llvm.x86.avx512.mask3.vfmadd.sd + // CHECK: @llvm.x86.avx512.mask3.vfnmsub.sd return _mm_mask3_fnmsub_round_sd(__W, __X, __Y, __U, _MM_FROUND_CUR_DIRECTION); } -- 2.7.4