We already need to have patterns for X86ISD::RNDSCALE to support software intrinsics. But we currently have 5 sets of patterns for the 5 rounding operations. For of these 6 patterns we have to support 3 vectors widths, 2 element sizes, sse/vex/evex encodings, load folding, and broadcast load folding. This results in a fair amount of bytes in the isel table.
This patch adds code to PreProcessIselDAG to morph the fceil/ffloor/ftrunc/fnearbyint/frint to X86ISD::RNDSCALE. This way we can remove everything, but the intrinsic pattern while still allowing the operations to be considered Legal for DAGCombine and Legalization. This shrinks the DAGISel by somewhere between 9K and 10K.
There is one complication to this, the STRICT versions of these nodes are currently mutated to their none strict equivalents at isel time when the node is visited. This won't be true in the future since that loses the chain ordering information. For now I've also added support for the non-STRICT nodes to Select so we can change the STRICT versions there after they've been mutated to their non-STRICT versions. We'll probably need a STRICT version of RNDSCALE or something to handle this in the future. Which will take us back to needing 2 sets of patterns for strict and non-strict, but that's still better than the 11 or 12 sets of patterns we'd need.
We can probably do something similar for scalar, but I haven't looked at it yet.
Differential Revision: https://reviews.llvm.org/D62757
llvm-svn: 362535
continue;
}
- // Replace vector shifts with their X86 specific equivalent so we don't
- // need 2 sets of patterns.
switch (N->getOpcode()) {
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL:
- if (N->getValueType(0).isVector()) {
- unsigned NewOpc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected opcode!");
- case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
- case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
- case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
- }
- SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1));
- --I;
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
- ++I;
- CurDAG->DeleteNode(N);
- continue;
+ case ISD::SRL: {
+ // Replace vector shifts with their X86 specific equivalent so we don't
+ // need 2 sets of patterns.
+ if (!N->getValueType(0).isVector())
+ break;
+
+ unsigned NewOpc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
+ case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
+ case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
+ }
+ SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FRINT: {
+ // Replace vector rounding with their X86 specific equivalent so we don't
+ // need 2 sets of patterns.
+ if (!N->getValueType(0).isVector())
+ break;
+
+ unsigned Imm;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::FCEIL: Imm = 0xA; break;
+ case ISD::FFLOOR: Imm = 0x9; break;
+ case ISD::FTRUNC: Imm = 0xB; break;
+ case ISD::FNEARBYINT: Imm = 0xC; break;
+ case ISD::FRINT: Imm = 0x4; break;
}
+ SDLoc dl(N);
+ SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+ N->getValueType(0),
+ N->getOperand(0),
+ CurDAG->getConstant(Imm, dl, MVT::i8));
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
}
if (OptLevel != CodeGenOpt::None &&
if (foldLoadStoreIntoMemOperand(Node))
return;
break;
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FRINT: {
+ // Replace vector rounding with their X86 specific equivalent so we don't
+ // need 2 sets of patterns.
+ // FIXME: This can only happen when the nodes started as STRICT_* and have
+ // been mutated into their non-STRICT equivalents. Eventually this
+ // mutation will be removed and we should switch the STRICT_ nodes to a
+ // strict version of RNDSCALE in PreProcessISelDAG.
+ if (!Node->getValueType(0).isVector())
+ break;
+
+ unsigned Imm;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case ISD::FCEIL: Imm = 0xA; break;
+ case ISD::FFLOOR: Imm = 0x9; break;
+ case ISD::FTRUNC: Imm = 0xB; break;
+ case ISD::FNEARBYINT: Imm = 0xC; break;
+ case ISD::FRINT: Imm = 0x4; break;
+ }
+ SDLoc dl(Node);
+ SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
+ Node->getValueType(0),
+ Node->getOperand(0),
+ CurDAG->getConstant(Imm, dl, MVT::i8));
+ ReplaceNode(Node, Res.getNode());
+ SelectCode(Res.getNode());
+ return;
+ }
}
SelectCode(Node);
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
-
-multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
- // Register
- def : Pat<(_.VT (ffloor _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0x9))>;
- def : Pat<(_.VT (fnearbyint _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0xC))>;
- def : Pat<(_.VT (fceil _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0xA))>;
- def : Pat<(_.VT (frint _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0x4))>;
- def : Pat<(_.VT (ftrunc _.RC:$src)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
- _.RC:$src, (i32 0xB))>;
-
- // Merge-masking
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
- _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
- // Zero-masking
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
- _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
-
- // Load
- def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
- addr:$src, (i32 0xB))>;
-
- // Merge-masking + load
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
- // Zero-masking + load
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
- _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
- // Broadcast load
- def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
- addr:$src, (i32 0xB))>;
-
- // Merge-masking + broadcast load
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.RC:$dst)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
- _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-
- // Zero-masking + broadcast load
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0x9))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0xC))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0xA))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0x4))>;
- def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
- _.ImmAllZerosV)),
- (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
- _.KRCWM:$mask, addr:$src, (i32 0xB))>;
-}
-
-let Predicates = [HasAVX512] in {
- defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
- defm : AVX512_rndscale_lowering<v8f64_info, "PD">;
-}
-
-let Predicates = [HasVLX] in {
- defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
- defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
- defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
- defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
-}
-
multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
X86VectorVTInfo _,
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src, (i32 0xB))>;
}
-let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(v4f32 (ffloor VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v4f32 (ffloor (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
- (VROUNDPSm addr:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
- (VROUNDPDm addr:$src, (i32 0xB))>;
-
- def : Pat<(v8f32 (ffloor VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0x9))>;
- def : Pat<(v8f32 (fnearbyint VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0xC))>;
- def : Pat<(v8f32 (fceil VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0xA))>;
- def : Pat<(v8f32 (frint VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0x4))>;
- def : Pat<(v8f32 (ftrunc VR256:$src)),
- (VROUNDPSYr VR256:$src, (i32 0xB))>;
-
- def : Pat<(v8f32 (ffloor (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0x9))>;
- def : Pat<(v8f32 (fnearbyint (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0xC))>;
- def : Pat<(v8f32 (fceil (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0xA))>;
- def : Pat<(v8f32 (frint (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0x4))>;
- def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
- (VROUNDPSYm addr:$src, (i32 0xB))>;
-
- def : Pat<(v4f64 (ffloor VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0x9))>;
- def : Pat<(v4f64 (fnearbyint VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0xC))>;
- def : Pat<(v4f64 (fceil VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0xA))>;
- def : Pat<(v4f64 (frint VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0x4))>;
- def : Pat<(v4f64 (ftrunc VR256:$src)),
- (VROUNDPDYr VR256:$src, (i32 0xB))>;
-
- def : Pat<(v4f64 (ffloor (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0x9))>;
- def : Pat<(v4f64 (fnearbyint (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0xC))>;
- def : Pat<(v4f64 (fceil (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0xA))>;
- def : Pat<(v4f64 (frint (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0x4))>;
- def : Pat<(v4f64 (ftrunc (loadv4f64 addr:$src))),
- (VROUNDPDYm addr:$src, (i32 0xB))>;
-}
-
let ExeDomain = SSEPackedSingle in
defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32,
memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>;
(ROUNDSDm addr:$src, (i32 0xB))>;
}
-let Predicates = [UseSSE41] in {
- def : Pat<(v4f32 (ffloor VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v4f32 (ffloor (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0x9))>;
- def : Pat<(v4f32 (fnearbyint (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0xC))>;
- def : Pat<(v4f32 (fceil (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0xA))>;
- def : Pat<(v4f32 (frint (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0x4))>;
- def : Pat<(v4f32 (ftrunc (memopv4f32 addr:$src))),
- (ROUNDPSm addr:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0xB))>;
-
- def : Pat<(v2f64 (ffloor (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0x9))>;
- def : Pat<(v2f64 (fnearbyint (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0xC))>;
- def : Pat<(v2f64 (fceil (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0xA))>;
- def : Pat<(v2f64 (frint (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0x4))>;
- def : Pat<(v2f64 (ftrunc (memopv2f64 addr:$src))),
- (ROUNDPDm addr:$src, (i32 0xB))>;
-}
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//