[X86] Specifically limit fmin/fmax commutativity to NoNaNs + NoSignedZeros

author Benjamin Kramer <benny.kra@googlemail.com>

Tue, 5 Nov 2019 18:12:44 +0000 (19:12 +0100)

committer Benjamin Kramer <benny.kra@googlemail.com>

Tue, 5 Nov 2019 18:34:06 +0000 (19:34 +0100)
author Benjamin Kramer <benny.kra@googlemail.com>
Tue, 5 Nov 2019 18:12:44 +0000 (19:12 +0100)
committer Benjamin Kramer <benny.kra@googlemail.com>
Tue, 5 Nov 2019 18:34:06 +0000 (19:34 +0100)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index cec0454..ea4d43b 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41986,8 +41986,9 @@ static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
  static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
    assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
  
-  // Only perform optimizations if UnsafeMath is used.
-  if (!DAG.getTarget().Options.UnsafeFPMath)
+  // FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
+  if (!DAG.getTarget().Options.NoNaNsFPMath ||
+      !DAG.getTarget().Options.NoSignedZerosFPMath)
      return SDValue();
  
    // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
diff --git a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll

index 00c9e4c..9282ed1 100644 (file)
--- a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
+++ b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
@@ -1,5 +1,8 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK_UNSAFE --check-prefix=AVX512F_UNSAFE
+; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK_UNSAFE --check-prefix=AVX512F_UNSAFE
+; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64 -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
  ; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
  
  define <16 x float> @test_max_v16f32(<16 x float> * %a_ptr, <16 x float> %b)  {
diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll

index 162cda8..22da63a 100644 (file)
--- a/llvm/test/CodeGen/X86/machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner.ll
@@ -1,13 +1,13 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
  
  ; Incremental updates of the instruction depths should be enough for this test
  ; case.
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
  
  ; Verify that the first two adds are independent regardless of how the inputs are
  ; commuted. The destination registers are used as source registers for the third add.
diff --git a/llvm/test/CodeGen/X86/sse-minmax.ll b/llvm/test/CodeGen/X86/sse-minmax.ll

index 178a74b..0f703d6 100644 (file)
--- a/llvm/test/CodeGen/X86/sse-minmax.ll
+++ b/llvm/test/CodeGen/X86/sse-minmax.ll
@@ -1,7 +1,7 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2                                                 | FileCheck %s --check-prefix=ALL --check-prefix=STRICT
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=UNSAFE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-nans-fp-math                         | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=FINITE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2                                                          | FileCheck %s --check-prefix=ALL --check-prefix=STRICT
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-signed-zeros-fp-math -enable-no-nans-fp-math  | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=UNSAFE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-nans-fp-math                                  | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=FINITE
  
  ; Some of these patterns can be matched as SSE min or max. Some of
  ; them can be matched provided that the operands are swapped.
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll

index 39882cd..9ad8e7b 100644 (file)
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
@@ -2051,4 +2051,4 @@ define <16 x float> @stack_fold_permilpsvar_zmm_maskz(<16 x float> %a0, <16 x i3
  }
  
  attributes #0 = { "unsafe-fp-math"="false" }
-attributes #1 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
author	Benjamin Kramer <benny.kra@googlemail.com>
	Tue, 5 Nov 2019 18:12:44 +0000 (19:12 +0100)
committer	Benjamin Kramer <benny.kra@googlemail.com>
	Tue, 5 Nov 2019 18:34:06 +0000 (19:34 +0100)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll		patch \| blob \| history
llvm/test/CodeGen/X86/machine-combiner.ll		patch \| blob \| history
llvm/test/CodeGen/X86/sse-minmax.ll		patch \| blob \| history
llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll		patch \| blob \| history