def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>;
def frem : SDNode<"ISD::FREM" , SDTFPBinOp>;
-def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>;
-def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>;
+def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>;
+def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>;
def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>;
def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def strict_frem : SDNode<"ISD::STRICT_FREM",
SDTFPBinOp, [SDNPHasChain]>;
def strict_fma : SDNode<"ISD::STRICT_FMA",
- SDTFPTernaryOp, [SDNPHasChain]>;
+ SDTFPTernaryOp, [SDNPHasChain, SDNPCommutative]>;
def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fsin : SDNode<"ISD::STRICT_FSIN",
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
(FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-// And here "(-a) + b*(-c)"
-
-let Predicates = [HasNEON, HasFullFP16] in
-def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))),
- (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
-
-def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
- (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-
-def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
- (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-
//===----------------------------------------------------------------------===//
// Floating point comparison instructions.
//===----------------------------------------------------------------------===//
defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-// The following def pats catch the case where the LHS of an FMA is negated.
-// The TriOpFrag above catches the case where the middle operand is negated.
-def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
- (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
-
-def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
- (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
-def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
- (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
if fms then {
def : Pat<(VTI.Vec (fma (fneg m1), m2, add)),
(Inst $add, $m1, $m2)>;
- def : Pat<(VTI.Vec (fma m1, (fneg m2), add)),
- (Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma (fneg m1), m2, add)),
add)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
- def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
- (VTI.Vec (fma m1, (fneg m2), add)),
- add)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
(VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
-// (fma x, (fneg y), z) -> (vfms z, x, y)
-def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
- (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
- (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
- Requires<[HasVFP4]>;
-def : Pat<(f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin))),
- (VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
- Requires<[HasFullFP16]>;
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))),
(VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
Requires<[HasFullFP16]>;
-// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
-def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
- (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4,HasDPVFP]>;
-def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
- (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
- Requires<[HasVFP4]>;
-def : Pat<(fneg (f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin)))),
- (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>,
- Requires<[HasFullFP16]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
(F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
-def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
- (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
(PS_vmulw V2I32:$Rs, V2I32:$Rt)>;
// fnmsub: -rs1 * rs2 + rs3
def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
(FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
-def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), FPR64:$rs3),
- (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
// fnmadd: -rs1 * rs2 - rs3
def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
(FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
-def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), (fneg FPR64:$rs3)),
- (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
// canonical NaN when giving a signaling NaN. This doesn't match the LLVM
// fnmsub: -rs1 * rs2 + rs3
def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
(FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
-def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), FPR32:$rs3),
- (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
// fnmadd: -rs1 * rs2 - rs3
def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
(FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
-def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), (fneg FPR32:$rs3)),
- (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
// The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the
// canonical NaN when given a signaling NaN. This doesn't match the LLVM
avx512vl_f64_info, "PD">, VEX_W;
}
-defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
+defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
X86Fmadd, X86FmaddRnd>;
defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
X86Fmsub, X86FmsubRnd>;
avx512vl_f64_info, "PD">, VEX_W;
}
-defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
+defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
X86Fmadd, X86FmaddRnd>;
defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
X86Fmsub, X86FmsubRnd>;
avx512vl_f64_info, "PD">, VEX_W;
}
-defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
+defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
X86Fmadd, X86FmaddRnd>;
defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
X86Fmsub, X86FmsubRnd>;
}
}
-defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
+defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
}
}
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+defm : avx512_scalar_fma_patterns<any_fma, X86Fmadd, X86FmaddRnd, "VFMADD",
"SS", X86Movss, v4f32x_info, fp32imm0>;
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
"SS", X86Movss, v4f32x_info, fp32imm0>;
defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
"SS", X86Movss, v4f32x_info, fp32imm0>;
-defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
+defm : avx512_scalar_fma_patterns<any_fma, X86Fmadd, X86FmaddRnd, "VFMADD",
"SD", X86Movsd, v2f64x_info, fp64imm0>;
defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
"SD", X86Movsd, v2f64x_info, fp64imm0>;
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
- loadv4f32, loadv8f32, X86any_Fmadd, v4f32, v8f32,
+ loadv4f32, loadv8f32, any_fma, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
loadv4f32, loadv8f32, X86any_Fmsub, v4f32, v8f32,
let ExeDomain = SSEPackedDouble in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
- loadv2f64, loadv4f64, X86any_Fmadd, v2f64,
+ loadv2f64, loadv4f64, any_fma, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
loadv2f64, loadv4f64, X86any_Fmsub, v2f64,
VR128, sdmem, sched>, VEX_W;
}
-defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86any_Fmadd,
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", any_fma,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86any_Fmsub,
SchedWriteFMA.Scl>, VEX_LIG;
}
}
-defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<any_fma, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
-defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<any_fma, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86any_Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86any_Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86any_Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
- defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86any_Fmadd, loadf32,
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, any_fma, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
// Packed Instructions
- defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86any_Fmadd, v4f32, v8f32,
+ defm VFMADDPS4 : fma4p<0x68, "vfmaddps", any_fma, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86any_Fmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
- defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86any_Fmadd, loadf64,
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, any_fma, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
// Packed Instructions
- defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86any_Fmadd, v2f64, v4f64,
+ defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", any_fma, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86any_Fmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
}
}
-defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
def X86strict_Fmadd : SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
-def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
- [(X86strict_Fmadd node:$src1, node:$src2, node:$src3),
- (X86Fmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
; CHECK: liveins: $x0
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1)
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
- ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY2]], 0, 0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]]
+ ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0
; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]]
%2:gpr(p0) = COPY $x0
%0:fpr(s1) = G_LOAD %2(p0) :: (load 1)
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY2]], [[COPY1]], [[COPY]]
+ ; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY1]], [[COPY2]], [[COPY]]
; CHECK: $noreg = PATCHABLE_RET [[FNMADDSrrr]]
%5:fpr(s32) = COPY $s2
%4:fpr(s32) = COPY $s1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
- ; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY2]], [[COPY1]], [[COPY]]
+ ; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY1]], [[COPY2]], [[COPY]]
; CHECK: $noreg = PATCHABLE_RET [[FNMADDDrrr]]
%5:fpr(s64) = COPY $d2
%4:fpr(s64) = COPY $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
- ; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY]], [[COPY2]]
+ ; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY2]], [[COPY]]
; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f32_]]
%4:fpr(<2 x s32>) = COPY $d2
%3:fpr(<2 x s32>) = COPY $d1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY]], [[COPY2]]
+ ; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY2]], [[COPY]]
; CHECK: $noreg = PATCHABLE_RET [[FMLSv4f32_]]
%4:fpr(<4 x s32>) = COPY $q2
%3:fpr(<4 x s32>) = COPY $q1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY]], [[COPY2]]
+ ; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY2]], [[COPY]]
; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f64_]]
%4:fpr(<2 x s64>) = COPY $q2
%3:fpr(<2 x s64>) = COPY $q1
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: ldr d2, [x1]
; CHECK-NEXT: ldr d0, [x2]
-; CHECK-NEXT: fmls.2s v0, v2, v1
+; CHECK-NEXT: fmls.2s v0, v1, v2
; CHECK-NEXT: ret
%tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = load <2 x float>, <2 x float>* %B
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: fmls.4s v0, v2, v1
+; CHECK-NEXT: fmls.4s v0, v1, v2
; CHECK-NEXT: ret
%tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = load <4 x float>, <4 x float>* %B
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q0, [x2]
-; CHECK-NEXT: fmls.2d v0, v2, v1
+; CHECK-NEXT: fmls.2d v0, v1, v2
; CHECK-NEXT: ret
%tmp1 = load <2 x double>, <2 x double>* %A
%tmp2 = load <2 x double>, <2 x double>* %B
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
; CHECK: $d0 = COPY [[VFMSD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
; CHECK: $d0 = COPY [[VFNMSD]]
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0
%0:fprb(s64) = COPY $d0
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms2:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr.16 s0, [r0]
-; CHECK-NEXT: vldr.16 s2, [r1]
+; CHECK-NEXT: vldr.16 s0, [r1]
+; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
;
; DONT-FUSE-LABEL: fms2:
; DONT-FUSE: @ %bb.0:
-; DONT-FUSE-NEXT: vldr.16 s0, [r0]
-; DONT-FUSE-NEXT: vldr.16 s2, [r1]
+; DONT-FUSE-NEXT: vldr.16 s0, [r1]
+; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms3:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr.16 s0, [r1]
-; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
;
; DONT-FUSE-LABEL: fnms3:
; DONT-FUSE: @ %bb.0:
-; DONT-FUSE-NEXT: vldr.16 s0, [r1]
-; DONT-FUSE-NEXT: vldr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s0, [r0]
+; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; RV32IFD-NEXT: fcvt.d.w ft3, zero
; RV32IFD-NEXT: fadd.d ft2, ft2, ft3
; RV32IFD-NEXT: fadd.d ft1, ft1, ft3
-; RV32IFD-NEXT: fnmadd.d ft0, ft0, ft2, ft1
+; RV32IFD-NEXT: fnmadd.d ft0, ft2, ft0, ft1
; RV32IFD-NEXT: fsd ft0, 8(sp)
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
; RV64IFD-NEXT: fmv.d.x ft3, zero
; RV64IFD-NEXT: fadd.d ft2, ft2, ft3
; RV64IFD-NEXT: fadd.d ft1, ft1, ft3
-; RV64IFD-NEXT: fnmadd.d ft0, ft0, ft2, ft1
+; RV64IFD-NEXT: fnmadd.d ft0, ft2, ft0, ft1
; RV64IFD-NEXT: fmv.x.d a0, ft0
; RV64IFD-NEXT: ret
%b_ = fadd double 0.0, %b
; RV32IFD-NEXT: fld ft2, 8(sp)
; RV32IFD-NEXT: fcvt.d.w ft3, zero
; RV32IFD-NEXT: fadd.d ft2, ft2, ft3
-; RV32IFD-NEXT: fnmsub.d ft0, ft1, ft2, ft0
+; RV32IFD-NEXT: fnmsub.d ft0, ft2, ft1, ft0
; RV32IFD-NEXT: fsd ft0, 8(sp)
; RV32IFD-NEXT: lw a0, 8(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
; RV64IFD-NEXT: fmv.d.x ft2, a1
; RV64IFD-NEXT: fmv.d.x ft3, zero
; RV64IFD-NEXT: fadd.d ft2, ft2, ft3
-; RV64IFD-NEXT: fnmsub.d ft0, ft1, ft2, ft0
+; RV64IFD-NEXT: fnmsub.d ft0, ft2, ft1, ft0
; RV64IFD-NEXT: fmv.x.d a0, ft0
; RV64IFD-NEXT: ret
%b_ = fadd double 0.0, %b
; RV32IF-NEXT: fmv.w.x ft3, zero
; RV32IF-NEXT: fadd.s ft2, ft2, ft3
; RV32IF-NEXT: fadd.s ft1, ft1, ft3
-; RV32IF-NEXT: fnmadd.s ft0, ft0, ft2, ft1
+; RV32IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1
; RV32IF-NEXT: fmv.x.w a0, ft0
; RV32IF-NEXT: ret
;
; RV64IF-NEXT: fmv.w.x ft3, zero
; RV64IF-NEXT: fadd.s ft2, ft2, ft3
; RV64IF-NEXT: fadd.s ft1, ft1, ft3
-; RV64IF-NEXT: fnmadd.s ft0, ft0, ft2, ft1
+; RV64IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1
; RV64IF-NEXT: fmv.x.w a0, ft0
; RV64IF-NEXT: ret
%b_ = fadd float 0.0, %b
; RV32IF-NEXT: fmv.w.x ft2, a1
; RV32IF-NEXT: fmv.w.x ft3, zero
; RV32IF-NEXT: fadd.s ft2, ft2, ft3
-; RV32IF-NEXT: fnmsub.s ft0, ft1, ft2, ft0
+; RV32IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0
; RV32IF-NEXT: fmv.x.w a0, ft0
; RV32IF-NEXT: ret
;
; RV64IF-NEXT: fmv.w.x ft2, a1
; RV64IF-NEXT: fmv.w.x ft3, zero
; RV64IF-NEXT: fadd.s ft2, ft2, ft3
-; RV64IF-NEXT: fnmsub.s ft0, ft1, ft2, ft0
+; RV64IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0
; RV64IF-NEXT: fmv.x.w a0, ft0
; RV64IF-NEXT: ret
%b_ = fadd float 0.0, %b
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
; CHECK-NEXT: vldrw.u32 q2, [r0], #16
-; CHECK-NEXT: vfms.f32 q2, q0, q1
+; CHECK-NEXT: vfms.f32 q2, q1, q0
; CHECK-NEXT: vstrb.8 q2, [r3], #16
; CHECK-NEXT: le lr, .LBB18_3
; CHECK-NEXT: @ %bb.4: @ %while.end
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vmov q3, q0
-; CHECK-NEXT: vldrw.u32 q1, [r1], #16
-; CHECK-NEXT: vldrw.u32 q2, [r0], #16
+; CHECK-NEXT: vldrw.u32 q1, [r0], #16
+; CHECK-NEXT: vldrw.u32 q2, [r1], #16
; CHECK-NEXT: vfms.f32 q3, q2, q1
; CHECK-NEXT: vstrw.32 q3, [r2], #16
; CHECK-NEXT: letp lr, .LBB6_2
define arm_aapcs_vfpcc <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; CHECK-LABEL: test_vfmsq_f16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vfms.f16 q0, q1, q2
+; CHECK-NEXT: vfms.f16 q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%0 = fneg <8 x half> %c
define arm_aapcs_vfpcc <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test_vfmsq_f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vfms.f32 q0, q1, q2
+; CHECK-NEXT: vfms.f32 q0, q2, q1
; CHECK-NEXT: bx lr
entry:
%0 = fneg <4 x float> %c