// Choose the SSE instruction sequence based on data type (float or double).
static const uint16_t OpcTable[2][4] = {
- { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
- { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }
+ { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr },
+ { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr }
};
const uint16_t *Opc = nullptr;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
+ const TargetRegisterClass *VR128 = &X86::VR128RegClass;
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
RHSReg, RHSIsKill);
- ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
}
updateValueMap(I, ResultReg);
return true;
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
{ X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
{ X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
- { X86::FsANDNPSrr, X86::FsANDNPDrr,X86::PANDNrr },
{ X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
{ X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
- { X86::FsANDPSrr, X86::FsANDPDrr, X86::PANDrr },
{ X86::ORPSrm, X86::ORPDrm, X86::PORrm },
{ X86::ORPSrr, X86::ORPDrr, X86::PORrr },
- { X86::FsORPSrr, X86::FsORPDrr, X86::PORrr },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
- { X86::FsXORPSrr, X86::FsXORPDrr, X86::PXORrr },
// AVX 128-bit support
{ X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
{ X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
- { X86::VFsANDNPSrr,X86::VFsANDNPDrr,X86::VPANDNrr },
{ X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
{ X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
- { X86::VFsANDPSrr, X86::VFsANDPDrr, X86::VPANDrr },
{ X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
{ X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
- { X86::VFsORPSrr, X86::VFsORPDrr, X86::VPORrr },
{ X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
{ X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
- { X86::VFsXORPSrr, X86::VFsXORPDrr, X86::VPXORrr },
// AVX 256-bit support
{ X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
{ X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
// SSE 1 & 2 - Logical Instructions
//===----------------------------------------------------------------------===//
-// Multiclass for scalars using the X86 logical operation aliases for FP.
-multiclass sse12_fp_packed_scalar_logical_alias<
- bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
- defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, loadf32_128, SSEPackedSingle, itins, 0>,
- PS, VEX_4V;
-
- defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, loadf64_128, SSEPackedDouble, itins, 0>,
- PD, VEX_4V;
-
- let Constraints = "$src1 = $dst" in {
- defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
- f32, f128mem, memopfsf32_128, SSEPackedSingle, itins>, PS;
-
- defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
- f64, f128mem, memopfsf64_128, SSEPackedDouble, itins>, PD;
- }
-}
-
-let isCodeGenOnly = 1 in {
- defm FsAND : sse12_fp_packed_scalar_logical_alias<0x54, "and", X86fand,
- SSE_BIT_ITINS_P>;
- defm FsOR : sse12_fp_packed_scalar_logical_alias<0x56, "or", X86for,
- SSE_BIT_ITINS_P>;
- defm FsXOR : sse12_fp_packed_scalar_logical_alias<0x57, "xor", X86fxor,
- SSE_BIT_ITINS_P>;
-
- let isCommutable = 0 in
- defm FsANDN : sse12_fp_packed_scalar_logical_alias<0x55, "andn", X86fandn,
- SSE_BIT_ITINS_P>;
-}
-
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
(VANDNPDYrm VR256:$src1, addr:$src2)>;
}
+let Predicates = [HasAVX] in {
+ // Use packed logical operations for scalar ops.
+ def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VANDPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VXORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (VANDNPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+
+ def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VANDPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VXORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (VANDNPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+}
+
let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
(ANDPSrr VR128:$src1, VR128:$src2)>;
(XORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
(ANDNPSrm VR128:$src1, addr:$src2)>;
+
+ // Use packed logical operations for scalar ops.
+ def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (ANDPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (ORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (XORPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+ def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
+ (COPY_TO_REGCLASS (ANDNPSrr
+ (COPY_TO_REGCLASS FR32:$src1, VR128),
+ (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
}
let Predicates = [UseSSE2] in {
(XORPDrm VR128:$src1, addr:$src2)>;
def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)),
(ANDNPDrm VR128:$src1, addr:$src2)>;
+
+ // Use packed logical operations for scalar ops.
+ def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (ANDPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (ORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (XORPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+ def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
+ (COPY_TO_REGCLASS (ANDNPDrr
+ (COPY_TO_REGCLASS FR64:$src1, VR128),
+ (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
}
//===----------------------------------------------------------------------===//