From 9bd6241106c4408358d77770e025d6f9adf4a410 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 9 Aug 2016 03:06:33 +0000 Subject: [PATCH] [X86] Remove the Fv packed logical operation alias instructions. Replace them with patterns to the regular instructions. This enables execution domain fixing which is why the tests changed. llvm-svn: 278090 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 24 ---- llvm/lib/Target/X86/X86InstrSSE.td | 159 +++++++++++++++------ .../CodeGen/X86/copysign-constant-magnitude.ll | 14 +- llvm/test/CodeGen/X86/fp-logic.ll | 2 +- llvm/test/CodeGen/X86/fp128-cast.ll | 4 +- llvm/test/CodeGen/X86/sse-fcopysign.ll | 6 +- llvm/test/CodeGen/X86/vec_fabs.ll | 12 +- 7 files changed, 133 insertions(+), 88 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 57276fb..f3d7c30 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -975,19 +975,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::DIVSSrr_Int, X86::DIVSSrm_Int, 0 }, { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 }, { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 }, - - // Do not fold Fs* scalar logical op loads because there are no scalar - // load variants for these instructions. When folded, the load is required - // to be 128-bits, so the load size would not match. - - { X86::FvANDNPDrr, X86::FvANDNPDrm, TB_ALIGN_16 }, - { X86::FvANDNPSrr, X86::FvANDNPSrm, TB_ALIGN_16 }, - { X86::FvANDPDrr, X86::FvANDPDrm, TB_ALIGN_16 }, - { X86::FvANDPSrr, X86::FvANDPSrm, TB_ALIGN_16 }, - { X86::FvORPDrr, X86::FvORPDrm, TB_ALIGN_16 }, - { X86::FvORPSrr, X86::FvORPSrm, TB_ALIGN_16 }, - { X86::FvXORPDrr, X86::FvXORPDrm, TB_ALIGN_16 }, - { X86::FvXORPSrr, X86::FvXORPSrm, TB_ALIGN_16 }, { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 }, { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 }, { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 }, @@ -1295,17 +1282,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, 0 }, { X86::VDPPDrri, X86::VDPPDrmi, 0 }, { X86::VDPPSrri, X86::VDPPSrmi, 0 }, - // Do not fold VFs* loads because there are no scalar load variants for - // these instructions. When folded, the load is required to be 128-bits, so - // the load size would not match. - { X86::VFvANDNPDrr, X86::VFvANDNPDrm, 0 }, - { X86::VFvANDNPSrr, X86::VFvANDNPSrm, 0 }, - { X86::VFvANDPDrr, X86::VFvANDPDrm, 0 }, - { X86::VFvANDPSrr, X86::VFvANDPSrm, 0 }, - { X86::VFvORPDrr, X86::VFvORPDrm, 0 }, - { X86::VFvORPSrr, X86::VFvORPSrm, 0 }, - { X86::VFvXORPDrr, X86::VFvXORPDrm, 0 }, - { X86::VFvXORPSrr, X86::VFvXORPSrm, 0 }, { X86::VHADDPDrr, X86::VHADDPDrm, 0 }, { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index fa2ee40..3ee1c1e 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2845,51 +2845,6 @@ let isCodeGenOnly = 1 in { SSE_BIT_ITINS_P>; } -// Multiclass for vectors using the X86 logical operation aliases for FP. -multiclass sse12_fp_packed_vector_logical_alias< - bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { - let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { - defm V#NAME#PS : sse12_fp_packed, - PS, VEX_4V; - - defm V#NAME#PD : sse12_fp_packed, - PD, VEX_4V; - - defm V#NAME#PSY : sse12_fp_packed, - PS, VEX_4V, VEX_L; - - defm V#NAME#PDY : sse12_fp_packed, - PD, VEX_4V, VEX_L; - } - - let Constraints = "$src1 = $dst" in { - defm PS : sse12_fp_packed, - PS; - - defm PD : sse12_fp_packed, - PD; - } -} - -let isCodeGenOnly = 1 in { - defm FvAND : sse12_fp_packed_vector_logical_alias<0x54, "and", X86fand, - SSE_BIT_ITINS_P>; - defm FvOR : sse12_fp_packed_vector_logical_alias<0x56, "or", X86for, - SSE_BIT_ITINS_P>; - defm FvXOR : sse12_fp_packed_vector_logical_alias<0x57, "xor", X86fxor, - SSE_BIT_ITINS_P>; - - let isCommutable = 0 in - defm FvANDN : sse12_fp_packed_vector_logical_alias<0x55, "andn", X86fandn, - SSE_BIT_ITINS_P>; -} - /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// multiclass sse12_fp_packed_logical opc, string OpcodeStr, @@ -2971,6 +2926,120 @@ let Predicates = [HasAVX1Only] in { (VANDNPSYrm VR256:$src1, addr:$src2)>; } +let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { + def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), + (VANDPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), + (VORPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), + (VXORPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), + (VANDNPSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(X86fand VR128:$src1, (loadv4f32 addr:$src2)), + (VANDPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86for VR128:$src1, (loadv4f32 addr:$src2)), + (VORPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fxor VR128:$src1, (loadv4f32 addr:$src2)), + (VXORPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fandn VR128:$src1, (loadv4f32 addr:$src2)), + (VANDNPSrm VR128:$src1, addr:$src2)>; + + def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)), + (VANDPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)), + (VORPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)), + (VXORPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)), + (VANDNPDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(X86fand VR128:$src1, (loadv2f64 addr:$src2)), + (VANDPDrm VR128:$src1, addr:$src2)>; + def : Pat<(X86for VR128:$src1, (loadv2f64 addr:$src2)), + (VORPDrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fxor VR128:$src1, (loadv2f64 addr:$src2)), + (VXORPDrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fandn VR128:$src1, (loadv2f64 addr:$src2)), + (VANDNPDrm VR128:$src1, addr:$src2)>; + + def : Pat<(v8f32 (X86fand VR256:$src1, VR256:$src2)), + (VANDPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8f32 (X86for VR256:$src1, VR256:$src2)), + (VORPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8f32 (X86fxor VR256:$src1, VR256:$src2)), + (VXORPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8f32 (X86fandn VR256:$src1, VR256:$src2)), + (VANDNPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(X86fand VR256:$src1, (loadv8f32 addr:$src2)), + (VANDPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(X86for VR256:$src1, (loadv8f32 addr:$src2)), + (VORPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(X86fxor VR256:$src1, (loadv8f32 addr:$src2)), + (VXORPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(X86fandn VR256:$src1, (loadv8f32 addr:$src2)), + (VANDNPSYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v4f64 (X86fand VR256:$src1, VR256:$src2)), + (VANDPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4f64 (X86for VR256:$src1, VR256:$src2)), + (VORPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4f64 (X86fxor VR256:$src1, VR256:$src2)), + (VXORPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4f64 (X86fandn VR256:$src1, VR256:$src2)), + (VANDNPDYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(X86fand VR256:$src1, (loadv4f64 addr:$src2)), + (VANDPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(X86for VR256:$src1, (loadv4f64 addr:$src2)), + (VORPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(X86fxor VR256:$src1, (loadv4f64 addr:$src2)), + (VXORPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(X86fandn VR256:$src1, (loadv4f64 addr:$src2)), + (VANDNPDYrm VR256:$src1, addr:$src2)>; +} + +let Predicates = [UseSSE1] in { + def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), + (ANDPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), + (ORPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), + (XORPSrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), + (ANDNPSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), + (ANDPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), + (ORPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), + (XORPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), + (ANDNPSrm VR128:$src1, addr:$src2)>; +} + +let Predicates = [UseSSE2] in { + def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)), + (ANDPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)), + (ORPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)), + (XORPDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)), + (ANDNPDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(X86fand VR128:$src1, (memopv2f64 addr:$src2)), + (ANDPDrm VR128:$src1, addr:$src2)>; + def : Pat<(X86for VR128:$src1, (memopv2f64 addr:$src2)), + (ORPDrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fxor VR128:$src1, (memopv2f64 addr:$src2)), + (XORPDrm VR128:$src1, addr:$src2)>; + def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)), + (ANDNPDrm VR128:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll index 6c577a2..539061b 100644 --- a/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll +++ b/llvm/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -17,28 +17,28 @@ define void @test_copysign_const_magnitude_d(double %X) { ; CHECK: id %iX = call double @id_d(double %X) -; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 %d0 = call double @copysign(double 0.000000e+00, double %iX) ; CHECK-NEXT: id %id0 = call double @id_d(double %d0) -; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 -; CHECK-NEXT: orpd [[ZERO]](%rip), %xmm0 +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orps [[ZERO]](%rip), %xmm0 %dn0 = call double @copysign(double -0.000000e+00, double %id0) ; CHECK-NEXT: id %idn0 = call double @id_d(double %dn0) -; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 -; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0 +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orps [[ONE]](%rip), %xmm0 %d1 = call double @copysign(double 1.000000e+00, double %idn0) ; CHECK-NEXT: id %id1 = call double @id_d(double %d1) -; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0 -; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0 +; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0 +; CHECK-NEXT: orps [[ONE]](%rip), %xmm0 %dn1 = call double @copysign(double -1.000000e+00, double %id1) ; CHECK-NEXT: id diff --git a/llvm/test/CodeGen/X86/fp-logic.ll b/llvm/test/CodeGen/X86/fp-logic.ll index 9ab6751..ce60d80 100644 --- a/llvm/test/CodeGen/X86/fp-logic.ll +++ b/llvm/test/CodeGen/X86/fp-logic.ll @@ -265,7 +265,7 @@ define float @movmsk(float %x) { define double @bitcast_fabs(double %x) { ; CHECK-LABEL: bitcast_fabs: ; CHECK: # BB#0: -; CHECK-NEXT: andpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; %bc1 = bitcast double %x to i64 diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index 2d87249..5bae3cb 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -351,8 +351,8 @@ cleanup: ; preds = %entry, %if.then ; ; X64-LABEL: TestTruncCopysign: ; X64: callq __trunctfdf2 -; X64-NEXT: andpd {{.*}}, %xmm0 -; X64-NEXT: orpd {{.*}}, %xmm0 +; X64-NEXT: andps {{.*}}, %xmm0 +; X64-NEXT: orps {{.*}}, %xmm0 ; X64-NEXT: callq __extenddftf2 ; X64: retq } diff --git a/llvm/test/CodeGen/X86/sse-fcopysign.ll b/llvm/test/CodeGen/X86/sse-fcopysign.ll index b01bbac..b84a589 100644 --- a/llvm/test/CodeGen/X86/sse-fcopysign.ll +++ b/llvm/test/CodeGen/X86/sse-fcopysign.ll @@ -108,9 +108,9 @@ define double @int2(double %a, float %b, float %c) nounwind { ; X64: # BB#0: ; X64-NEXT: addss %xmm2, %xmm1 ; X64-NEXT: cvtss2sd %xmm1, %xmm1 -; X64-NEXT: andpd {{.*}}(%rip), %xmm1 -; X64-NEXT: andpd {{.*}}(%rip), %xmm0 -; X64-NEXT: orpd %xmm1, %xmm0 +; X64-NEXT: andps {{.*}}(%rip), %xmm1 +; X64-NEXT: andps {{.*}}(%rip), %xmm0 +; X64-NEXT: orps %xmm1, %xmm0 ; X64-NEXT: retq %tmp1 = fadd float %b, %c %tmp2 = fpext float %tmp1 to double diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll index 0f5e099..768c5ec 100644 --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -7,12 +7,12 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) { ; X32-LABEL: fabs_v2f64: ; X32: # BB#0: -; X32-NEXT: vandpd .LCPI0_0, %xmm0, %xmm0 +; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: fabs_v2f64: ; X64: # BB#0: -; X64-NEXT: vandpd {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: retq %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) ret <2 x double> %t @@ -22,7 +22,7 @@ declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p) define <4 x float> @fabs_v4f32(<4 x float> %p) { ; X32-LABEL: fabs_v4f32: ; X32: # BB#0: -; X32-NEXT: vandps .LCPI1_0, %xmm0, %xmm0 +; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: fabs_v4f32: @@ -37,12 +37,12 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) define <4 x double> @fabs_v4f64(<4 x double> %p) { ; X32-LABEL: fabs_v4f64: ; X32: # BB#0: -; X32-NEXT: vandpd .LCPI2_0, %ymm0, %ymm0 +; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: fabs_v4f64: ; X64: # BB#0: -; X64-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0 +; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: retq %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p) ret <4 x double> %t @@ -52,7 +52,7 @@ declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p) define <8 x float> @fabs_v8f32(<8 x float> %p) { ; X32-LABEL: fabs_v8f32: ; X32: # BB#0: -; X32-NEXT: vandps .LCPI3_0, %ymm0, %ymm0 +; X32-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: fabs_v8f32: -- 2.7.4