}
}
+ auto isReassociable = [Options](SDNode *N) {
+ return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
+ };
+
+ auto isContractableAndReassociableFMUL = [isContractableFMUL,
+ isReassociable](SDValue N) {
+ return isContractableFMUL(N) && isReassociable(N.getNode());
+ };
+
// More folding opportunities when target permits.
- if (Aggressive) {
+ if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
- N0.getOperand(2)->hasOneUse()) {
+ isContractableAndReassociableFMUL(N0.getOperand(2)) &&
+ N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N1.getOperand(2)) &&
+ isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
-
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode &&
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020) &&
+ if (isContractableAndReassociableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
return DAG.getNode(
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002) &&
+ if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
- if (isContractableFMUL(N120) &&
+ if (isContractableAndReassociableFMUL(N120) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);
- if (isContractableFMUL(N102) &&
+ if (isContractableAndReassociableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
; GFX9-F32DENORM-NEXT: s_setpc_b64
define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 {
entry:
- %mul = fmul half %u, %v
+ %mul = fmul reassoc half %u, %v
%mul.ext = fpext half %mul to float
%fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext)
- %add = fsub float %fma, %z
+ %add = fsub reassoc float %fma, %z
ret float %add
}
; GFX9-F32DENORM-NEXT: s_setpc_b64
define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 {
entry:
- %mul = fmul half %u, %v
+ %mul = fmul reassoc half %u, %v
%mul.ext = fpext half %mul to float
%fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext)
- %add = fsub float %x, %fma
+ %add = fsub reassoc float %x, %fma
ret float %add
}
define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmsub 0, 1, 2, 5
-; CHECK-NEXT: fmadd 1, 3, 4, 0
+; CHECK-NEXT: fmuls 0, 1, 2
+; CHECK-NEXT: fmadd 0, 3, 4, 0
+; CHECK-NEXT: fsub 1, 0, 5
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsmsubmdp 1, 2, 5
-; CHECK-VSX-NEXT: xsmaddadp 1, 3, 4
+; CHECK-VSX-NEXT: fmuls 0, 1, 2
+; CHECK-VSX-NEXT: xsmaddadp 0, 3, 4
+; CHECK-VSX-NEXT: xssubdp 1, 0, 5
; CHECK-VSX-NEXT: blr
double %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]
define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmsub 0, 3, 4, 5
-; CHECK-NEXT: fmadd 1, 1, 2, 0
+; CHECK-NEXT: fmuls 0, 3, 4
+; CHECK-NEXT: fmadds 0, 1, 2, 0
+; CHECK-NEXT: fsub 1, 0, 5
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsmsubmdp 3, 4, 5
-; CHECK-VSX-NEXT: xsmaddadp 3, 1, 2
-; CHECK-VSX-NEXT: fmr 1, 3
+; CHECK-VSX-NEXT: fmuls 0, 3, 4
+; CHECK-VSX-NEXT: fmadds 0, 1, 2, 0
+; CHECK-VSX-NEXT: xssubdp 1, 0, 5
; CHECK-VSX-NEXT: blr
float %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]
define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fneg 0, 1
-; CHECK-NEXT: fmadd 0, 0, 2, 5
-; CHECK-NEXT: fneg 1, 3
-; CHECK-NEXT: fmadd 1, 1, 4, 0
+; CHECK-NEXT: fmuls 0, 1, 2
+; CHECK-NEXT: fmadd 0, 3, 4, 0
+; CHECK-NEXT: fsub 1, 5, 0
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsnegdp 1, 1
-; CHECK-VSX-NEXT: xsnegdp 0, 3
-; CHECK-VSX-NEXT: xsmaddmdp 1, 2, 5
-; CHECK-VSX-NEXT: xsmaddadp 1, 0, 4
+; CHECK-VSX-NEXT: fmuls 0, 1, 2
+; CHECK-VSX-NEXT: xsmaddadp 0, 3, 4
+; CHECK-VSX-NEXT: xssubdp 1, 5, 0
; CHECK-VSX-NEXT: blr
double %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]
define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fneg 0, 3
-; CHECK-NEXT: fmadd 0, 0, 4, 5
-; CHECK-NEXT: fneg 1, 1
-; CHECK-NEXT: fmadd 1, 1, 2, 0
+; CHECK-NEXT: fmuls 0, 3, 4
+; CHECK-NEXT: fmadds 0, 1, 2, 0
+; CHECK-NEXT: fsub 1, 5, 0
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
; CHECK-VSX: # %bb.0:
-; CHECK-VSX-NEXT: xsnegdp 0, 3
-; CHECK-VSX-NEXT: xsnegdp 1, 1
-; CHECK-VSX-NEXT: xsmaddmdp 0, 4, 5
-; CHECK-VSX-NEXT: xsmaddadp 0, 1, 2
-; CHECK-VSX-NEXT: fmr 1, 0
+; CHECK-VSX-NEXT: fmuls 0, 3, 4
+; CHECK-VSX-NEXT: fmadds 0, 1, 2, 0
+; CHECK-VSX-NEXT: xssubdp 1, 5, 0
; CHECK-VSX-NEXT: blr
float %D, double %E) {
%F = fmul float %A, %B ; <float> [#uses=1]