From 375aa90291275bf6f85f0f3ef1e82e5dce0472d1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Dec 2016 00:42:28 +0000 Subject: [PATCH] [X86] Remove all of the patterns that use X86ISD:FAND/FXOR/FOR/FANDN except for the ones needed for SSE1. Anything SSE2 or above uses the integer ISD opcode. This removes 11721 bytes from the DAG isel table or 2.2% llvm-svn: 290073 --- llvm/lib/Target/X86/X86InstrAVX512.td | 44 ++++++------ llvm/lib/Target/X86/X86InstrSSE.td | 129 +++++----------------------------- 2 files changed, 42 insertions(+), 131 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0fd95cd..ad95ce9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4383,31 +4383,33 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; -multiclass avx512_fp_packed opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, OpndItins itins, bit IsCommutable> { - let ExeDomain = _.ExeDomain in { + let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm rr: AVX512_maskable, EVEX_4V; - defm rm: AVX512_maskable, - EVEX_4V; - defm rmb: AVX512_maskable, EVEX_4V, EVEX_B; + let mayLoad = 1 in { + defm rm: AVX512_maskable, + EVEX_4V; + defm rmb: AVX512_maskable, EVEX_4V, EVEX_B; + } } } -multiclass avx512_fp_round_packed opc, string OpcodeStr, SDNode OpNodeRnd, +multiclass avx512_fp_round_packed opc, string OpcodeStr, SDPatternOperator OpNodeRnd, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable opc, string OpcodeStr, SDNode OpNodeRn } -multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDNode OpNodeRnd, +multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDPatternOperator OpNodeRnd, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable opc, string OpcodeStr, SDNode OpNodeRnd, EVEX_4V, EVEX_B; } -multiclass avx512_fp_binop_p opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_fp_binop_p opc, string OpcodeStr, SDPatternOperator OpNode, Predicate prd, SizeItins itins, bit IsCommutable = 0> { let Predicates = [prd] in { @@ -4493,13 +4495,13 @@ let isCodeGenOnly = 1 in { defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, SSE_ALU_ITINS_P, 1>; } -defm VAND : avx512_fp_binop_p<0x54, "vand", X86fand, HasDQI, +defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, SSE_ALU_ITINS_P, 1>; -defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, HasDQI, +defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, SSE_ALU_ITINS_P, 0>; -defm VOR : avx512_fp_binop_p<0x56, "vor", X86for, HasDQI, +defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, SSE_ALU_ITINS_P, 1>; -defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI, +defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, SSE_ALU_ITINS_P, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 0e82a1e..4e9c725 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2859,80 +2859,6 @@ let Predicates = [HasAVX1Only] in { } let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { - def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), - (VANDPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), - (VORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), - (VXORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), - (VANDNPSrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (loadv4f32 addr:$src2)), - (VANDPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (loadv4f32 addr:$src2)), - (VORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (loadv4f32 addr:$src2)), - (VXORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (loadv4f32 addr:$src2)), - (VANDNPSrm VR128:$src1, addr:$src2)>; - - def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)), - (VANDPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)), - (VORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)), - (VXORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)), - (VANDNPDrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (loadv2f64 addr:$src2)), - (VANDPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (loadv2f64 addr:$src2)), - (VORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (loadv2f64 addr:$src2)), - (VXORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (loadv2f64 addr:$src2)), - (VANDNPDrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8f32 (X86fand VR256:$src1, VR256:$src2)), - (VANDPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8f32 (X86for VR256:$src1, VR256:$src2)), - (VORPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8f32 (X86fxor VR256:$src1, VR256:$src2)), - (VXORPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8f32 (X86fandn VR256:$src1, VR256:$src2)), - (VANDNPSYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(X86fand VR256:$src1, (loadv8f32 addr:$src2)), - (VANDPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86for VR256:$src1, (loadv8f32 addr:$src2)), - (VORPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fxor VR256:$src1, (loadv8f32 addr:$src2)), - (VXORPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fandn VR256:$src1, (loadv8f32 addr:$src2)), - (VANDNPSYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v4f64 (X86fand VR256:$src1, VR256:$src2)), - (VANDPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86for VR256:$src1, VR256:$src2)), - (VORPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86fxor VR256:$src1, VR256:$src2)), - (VXORPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86fandn VR256:$src1, VR256:$src2)), - (VANDNPDYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(X86fand VR256:$src1, (loadv4f64 addr:$src2)), - (VANDPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86for VR256:$src1, (loadv4f64 addr:$src2)), - (VORPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fxor VR256:$src1, (loadv4f64 addr:$src2)), - (VXORPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(X86fandn VR256:$src1, (loadv4f64 addr:$src2)), - (VANDNPDYrm VR256:$src1, addr:$src2)>; -} - -let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { // Use packed logical operations for scalar ops. def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)), (COPY_TO_REGCLASS (VANDPDrr @@ -2970,24 +2896,6 @@ let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { } let Predicates = [UseSSE1] in { - def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), - (ANDPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), - (ORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), - (XORPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), - (ANDNPSrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), - (ANDPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), - (ORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), - (XORPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), - (ANDNPSrm VR128:$src1, addr:$src2)>; - // Use packed logical operations for scalar ops. def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)), (COPY_TO_REGCLASS (ANDPSrr @@ -3008,24 +2916,6 @@ let Predicates = [UseSSE1] in { } let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86fand VR128:$src1, VR128:$src2)), - (ANDPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86for VR128:$src1, VR128:$src2)), - (ORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fxor VR128:$src1, VR128:$src2)), - (XORPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86fandn VR128:$src1, VR128:$src2)), - (ANDNPDrr VR128:$src1, VR128:$src2)>; - - def : Pat<(X86fand VR128:$src1, (memopv2f64 addr:$src2)), - (ANDPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86for VR128:$src1, (memopv2f64 addr:$src2)), - (ORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fxor VR128:$src1, (memopv2f64 addr:$src2)), - (XORPDrm VR128:$src1, addr:$src2)>; - def : Pat<(X86fandn VR128:$src1, (memopv2f64 addr:$src2)), - (ANDNPDrm VR128:$src1, addr:$src2)>; - // Use packed logical operations for scalar ops. def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)), (COPY_TO_REGCLASS (ANDPDrr @@ -3045,6 +2935,25 @@ let Predicates = [UseSSE2] in { (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>; } +// Patterns for packed operations when we don't have integer type available. +def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)), + (ANDPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)), + (ORPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)), + (XORPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)), + (ANDNPSrr VR128:$src1, VR128:$src2)>; + +def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)), + (ANDPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)), + (ORPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)), + (XORPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)), + (ANDNPSrm VR128:$src1, addr:$src2)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions //===----------------------------------------------------------------------===// -- 2.7.4