From 1a872f2b15157dc8a85aac85167d08b73cef1e76 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 10 Mar 2019 05:21:52 +0000 Subject: [PATCH] Recommit r355224 "[TableGen][SelectionDAG][X86] Add specific isel matchers for immAllZerosV/immAllOnesV. Remove bitcasts from X86 patterns that are no longer necessary." Includes a fix to emit a CheckOpcode for build_vector when immAllZerosV/immAllOnesV is used as a pattern root. This means it can't be used to look through bitcasts when used as a root, but that's probably ok. This extra CheckOpcode will ensure that the first match in the isel table will be a SwitchOpcode which is needed by the caching optimization in the ISel Matcher. Original commit message: Previously we had build_vector PatFrags that called ISD::isBuildVectorAllZeros/Ones. Internally the ISD::isBuildVectorAllZeros/Ones look through bitcasts, but we aren't able to take advantage of that in isel. Instead of we have to canonicalize the types of the all zeros/ones build_vectors and insert bitcasts. Then we have to pattern match those exact bitcasts. By emitting specific matchers for these 2 nodes, we can make isel look through any bitcasts without needing to explicitly match them. We should also be able to remove the canonicalization to vXi32 from lowering, but I've left that for a follow up. This removes something like 40,000 bytes from the X86 isel table. Differential Revision: https://reviews.llvm.org/D58595 llvm-svn: 355784 --- llvm/include/llvm/CodeGen/SelectionDAGISel.h | 2 + llvm/include/llvm/Target/TargetSelectionDAG.td | 13 ++-- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 6 ++ llvm/lib/Target/SystemZ/SystemZOperators.td | 20 ++--- llvm/lib/Target/X86/X86InstrAVX512.td | 88 ++++++++++------------ llvm/lib/Target/X86/X86InstrSSE.td | 2 +- llvm/lib/Target/X86/X86InstrVecCompiler.td | 2 +- llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 16 +++- llvm/utils/TableGen/DAGISelMatcher.cpp | 10 +++ llvm/utils/TableGen/DAGISelMatcher.h | 34 +++++++++ llvm/utils/TableGen/DAGISelMatcherEmitter.cpp | 10 +++ llvm/utils/TableGen/DAGISelMatcherGen.cpp | 21 ++++++ 12 files changed, 156 insertions(+), 68 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 10e96e8..36bc6c0 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -147,6 +147,8 @@ public: OPC_CheckValueType, OPC_CheckComplexPat, OPC_CheckAndImm, OPC_CheckOrImm, + OPC_CheckImmAllOnesV, + OPC_CheckImmAllZerosV, OPC_CheckFoldableChainNode, OPC_EmitInteger, diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 174d97a..64b0786 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -782,14 +782,11 @@ class FPImmLeaf def vtInt : PatLeaf<(vt), [{ return N->getVT().isInteger(); }]>; def vtFP : PatLeaf<(vt), [{ return N->getVT().isFloatingPoint(); }]>; -def immAllOnesV: PatLeaf<(build_vector), [{ - return ISD::isBuildVectorAllOnes(N); -}]>; -def immAllZerosV: PatLeaf<(build_vector), [{ - return ISD::isBuildVectorAllZeros(N); -}]>; - - +// Use ISD::isBuildVectorAllOnes or ISD::isBuildVectorAllZeros to look for +// the corresponding build_vector. Will look through bitcasts except when used +// as a pattern root. +def immAllOnesV; // ISD::isBuildVectorAllOnes +def immAllZerosV; // ISD::isBuildVectorAllZeros // Other helper fragments. def not : PatFrag<(ops node:$in), (xor node:$in, -1)>; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ac21a18..2b3a697 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -3393,6 +3393,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case OPC_CheckOrImm: if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; continue; + case OPC_CheckImmAllOnesV: + if (!ISD::isBuildVectorAllOnes(N.getNode())) break; + continue; + case OPC_CheckImmAllZerosV: + if (!ISD::isBuildVectorAllZeros(N.getNode())) break; + continue; case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 876a809..032d08d 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -735,13 +735,13 @@ def z_vlef64 : z_vle; // zeroed vector. class z_vllez : PatFrag<(ops node:$addr), - (z_vector_insert (immAllZerosV), + (z_vector_insert immAllZerosV, (scalartype (load node:$addr)), (i32 index))>; def z_vllezi8 : z_vllez; def z_vllezi16 : z_vllez; def z_vllezi32 : z_vllez; def z_vllezi64 : PatFrags<(ops node:$addr), - [(z_vector_insert (immAllZerosV), + [(z_vector_insert immAllZerosV, (i64 (load node:$addr)), (i32 0)), (z_join_dwords (i64 (load node:$addr)), (i64 0))]>; // We use high merges to form a v4f32 from four f32s. Propagating zero @@ -755,11 +755,11 @@ def z_vllezf32 : PatFrag<(ops node:$addr), (v4f32 (scalar_to_vector (f32 (load node:$addr)))))))), (v2i64 - (bitconvert (v4f32 (immAllZerosV)))))>; + (bitconvert (v4f32 immAllZerosV))))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high (v2f64 (scalar_to_vector (f64 (load node:$addr)))), - (immAllZerosV))>; + immAllZerosV)>; // Similarly for the high element of a zeroed vector. def z_vllezli32 : z_vllez; @@ -770,9 +770,9 @@ def z_vllezlf32 : PatFrag<(ops node:$addr), (z_merge_high (v4f32 (scalar_to_vector (f32 (load node:$addr)))), - (v4f32 (immAllZerosV))))), + (v4f32 immAllZerosV)))), (v2i64 - (bitconvert (v4f32 (immAllZerosV)))))>; + (bitconvert (v4f32 immAllZerosV))))>; // Store one element of a vector. class z_vste @@ -787,16 +787,16 @@ def z_vstef32 : z_vste; def z_vstef64 : z_vste; // Arithmetic negation on vectors. -def z_vneg : PatFrag<(ops node:$x), (sub (immAllZerosV), node:$x)>; +def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>; // Bitwise negation on vectors. -def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (immAllOnesV))>; +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, immAllOnesV)>; // Signed "integer greater than zero" on vectors. -def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (immAllZerosV))>; +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>; // Signed "integer less than zero" on vectors. -def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (immAllZerosV), node:$x)>; +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph immAllZerosV, node:$x)>; // Integer absolute on vectors. class z_viabs diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 6e25f77..995c9db 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -94,10 +94,7 @@ class X86VectorVTInfo("v" # !srl(Size, 5) # "i32"); - dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); + dag ImmAllZerosV = (VT immAllZerosV); string ZSuffix = !if (!eq (Size, 128), "Z128", !if (!eq (Size, 256), "Z256", "Z")); @@ -450,8 +447,8 @@ def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), (ins VK8WM:$mask), "", [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), - (bc_v8i64 (v16i32 immAllOnesV)), - (bc_v8i64 (v16i32 immAllZerosV))))]>; + (v8i64 immAllOnesV), + (v8i64 immAllZerosV)))]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -1464,7 +1461,7 @@ def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))), // Patterns for selects of bitcasted operations. def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - (bc_v16f32 (v16i32 immAllZerosV))), + (v16f32 immAllZerosV)), (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), @@ -1481,7 +1478,7 @@ def : Pat<(vselect VK16WM:$mask, def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), - (bc_v8f64 (v16i32 immAllZerosV))), + (v8f64 immAllZerosV)), (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), @@ -1489,7 +1486,7 @@ def : Pat<(vselect VK8WM:$mask, (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), - (bc_v8i64 (v16i32 immAllZerosV))), + (v8i64 immAllZerosV)), (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), @@ -1517,7 +1514,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))), // Patterns for selects of bitcasted operations. def : Pat<(vselect VK8WM:$mask, (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - (bc_v8f32 (v8i32 immAllZerosV))), + (v8f32 immAllZerosV)), (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), @@ -1566,7 +1563,7 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2" // Patterns for selects of bitcasted operations. def : Pat<(vselect VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - (bc_v4f64 (v8i32 immAllZerosV))), + (v4f64 immAllZerosV)), (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), @@ -1574,7 +1571,7 @@ def : Pat<(vselect VK4WM:$mask, (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - (bc_v4i64 (v8i32 immAllZerosV))), + (v4i64 immAllZerosV)), (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), @@ -1599,7 +1596,7 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", // Patterns for selects of bitcasted operations. def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), - (bc_v16f32 (v16i32 immAllZerosV))), + (v16f32 immAllZerosV)), (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), @@ -1616,7 +1613,7 @@ def : Pat<(vselect VK16WM:$mask, def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - (bc_v8f64 (v16i32 immAllZerosV))), + (v8f64 immAllZerosV)), (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), @@ -1624,7 +1621,7 @@ def : Pat<(vselect VK8WM:$mask, (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - (bc_v8i64 (v16i32 immAllZerosV))), + (v8i64 immAllZerosV)), (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), @@ -3609,7 +3606,7 @@ def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), "", []>, Sched<[WriteFStoreY]>; } -def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), +def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), (v8i64 VR512:$src))), (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), VK8), VR512:$src)>; @@ -3621,7 +3618,7 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), // These patterns exist to prevent the above patterns from introducing a second // mask inversion when one already exists. def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), - (bc_v8i64 (v16i32 immAllZerosV)), + (v8i64 immAllZerosV), (v8i64 VR512:$src))), (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), @@ -4119,8 +4116,7 @@ multiclass avx512_load_scalar_lowering(InstrStr#rmkz) (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), @@ -4145,8 +4141,7 @@ multiclass avx512_load_scalar_lowering_subreg(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), @@ -4175,8 +4170,7 @@ multiclass avx512_load_scalar_lowering_subreg2(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), @@ -4194,7 +4188,7 @@ def : Pat<(_.info128.VT (extract_subvector // AVX512Vl patterns. def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, - (_.info128.VT (bitconvert (v4i32 immAllZerosV))))), + _.info128.ImmAllZerosV)), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; @@ -11578,21 +11572,21 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), - (bitconvert (v4i32 immAllZerosV))), + immAllZerosV), (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), - (bitconvert (v4i32 immAllZerosV))), + immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), - (bitconvert (v4i32 immAllZerosV))), + immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -12091,39 +12085,39 @@ defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, // TODO: We should maybe have a more generalized algorithm for folding to // vpternlog. let Predicates = [HasAVX512] in { - def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; - def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; - def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; - def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; } let Predicates = [HasAVX512, NoVLX] in { - def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), @@ -12131,28 +12125,28 @@ let Predicates = [HasAVX512, NoVLX] in { (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (i8 15)), sub_ymm)>; - def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (i8 15)), sub_ymm)>; - def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (i8 15)), sub_ymm)>; - def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), @@ -12162,22 +12156,22 @@ let Predicates = [HasAVX512, NoVLX] in { } let Predicates = [HasVLX] in { - def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 778ab3c..f7a20aa2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8290,7 +8290,7 @@ multiclass maskmov_lowering(InstrStr#"rm") RC:$mask, addr:$ptr)>; def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), - (VT (bitconvert (ZeroVT immAllZerosV))))), + (VT immAllZerosV))), (!cast(InstrStr#"rm") RC:$mask, addr:$ptr)>; def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))), (!cast(BlendStr#"rr") diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td index e820072..7cb0ec0 100644 --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -175,7 +175,7 @@ multiclass subvec_zero_lowering { - def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)), + def : Pat<(DstTy (insert_subvector immAllZerosV, (SrcTy RC:$src), (iPTR 0))), (SUBREG_TO_REG (i64 0), (SrcTy (!cast("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>; diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 19a7874..0b1687d 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -1281,6 +1281,17 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const { // PatternToMatch implementation // +static bool isImmAllOnesAllZerosMatch(const TreePatternNode *P) { + if (!P->isLeaf()) + return false; + DefInit *DI = dyn_cast(P->getLeafValue()); + if (!DI) + return false; + + Record *R = DI->getDef(); + return R->getName() == "immAllOnesV" || R->getName() == "immAllZerosV"; +} + /// getPatternSize - Return the 'size' of this pattern. We want to match large /// patterns before small ones. This is used to determine the size of a /// pattern. @@ -1320,6 +1331,8 @@ static unsigned getPatternSize(const TreePatternNode *P, Size += 5; // Matches a ConstantSDNode (+3) and a specific value (+2). else if (Child->getComplexPatternInfo(CGP)) Size += getPatternSize(Child, CGP); + else if (isImmAllOnesAllZerosMatch(Child)) + Size += 4; // Matches a build_vector(+3) and a predicate (+1). else if (!Child->getPredicateCalls().empty()) ++Size; } @@ -2126,7 +2139,8 @@ static TypeSetByHwMode getImplicitType(Record *R, unsigned ResNo, } if (R->getName() == "node" || R->getName() == "srcvalue" || - R->getName() == "zero_reg") { + R->getName() == "zero_reg" || R->getName() == "immAllOnesV" || + R->getName() == "immAllZerosV") { // Placeholder. return TypeSetByHwMode(); // Unknown. } diff --git a/llvm/utils/TableGen/DAGISelMatcher.cpp b/llvm/utils/TableGen/DAGISelMatcher.cpp index 92b4c42..0c45ff1 100644 --- a/llvm/utils/TableGen/DAGISelMatcher.cpp +++ b/llvm/utils/TableGen/DAGISelMatcher.cpp @@ -237,6 +237,16 @@ void CheckFoldableChainNodeMatcher::printImpl(raw_ostream &OS, OS.indent(indent) << "CheckFoldableChainNode\n"; } +void CheckImmAllOnesVMatcher::printImpl(raw_ostream &OS, + unsigned indent) const { + OS.indent(indent) << "CheckAllOnesV\n"; +} + +void CheckImmAllZerosVMatcher::printImpl(raw_ostream &OS, + unsigned indent) const { + OS.indent(indent) << "CheckAllZerosV\n"; +} + void EmitIntegerMatcher::printImpl(raw_ostream &OS, unsigned indent) const { OS.indent(indent) << "EmitInteger " << Val << " VT=" << getEnumName(VT) << '\n'; diff --git a/llvm/utils/TableGen/DAGISelMatcher.h b/llvm/utils/TableGen/DAGISelMatcher.h index 9c45a3a..24932e6 100644 --- a/llvm/utils/TableGen/DAGISelMatcher.h +++ b/llvm/utils/TableGen/DAGISelMatcher.h @@ -71,6 +71,8 @@ public: CheckComplexPat, CheckAndImm, CheckOrImm, + CheckImmAllOnesV, + CheckImmAllZerosV, CheckFoldableChainNode, // Node creation/emisssion. @@ -126,6 +128,8 @@ public: case CheckValueType: case CheckAndImm: case CheckOrImm: + case CheckImmAllOnesV: + case CheckImmAllZerosV: case CheckFoldableChainNode: return true; } @@ -753,6 +757,36 @@ private: } }; +/// CheckImmAllOnesVMatcher - This check if the current node is an build vector +/// of all ones. +class CheckImmAllOnesVMatcher : public Matcher { +public: + CheckImmAllOnesVMatcher() : Matcher(CheckImmAllOnesV) {} + + static bool classof(const Matcher *N) { + return N->getKind() == CheckImmAllOnesV; + } + +private: + void printImpl(raw_ostream &OS, unsigned indent) const override; + bool isEqualImpl(const Matcher *M) const override { return true; } +}; + +/// CheckImmAllZerosVMatcher - This check if the current node is an build vector +/// of all zeros. +class CheckImmAllZerosVMatcher : public Matcher { +public: + CheckImmAllZerosVMatcher() : Matcher(CheckImmAllZerosV) {} + + static bool classof(const Matcher *N) { + return N->getKind() == CheckImmAllZerosV; + } + +private: + void printImpl(raw_ostream &OS, unsigned indent) const override; + bool isEqualImpl(const Matcher *M) const override { return true; } +}; + /// CheckFoldableChainNodeMatcher - This checks to see if the current node /// (which defines a chain operand) is safe to fold into a larger pattern. class CheckFoldableChainNodeMatcher : public Matcher { diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp index 16daeb5..cecbc6c 100644 --- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp @@ -601,6 +601,14 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx, OS << "OPC_CheckFoldableChainNode,\n"; return 1; + case Matcher::CheckImmAllOnesV: + OS << "OPC_CheckImmAllOnesV,\n"; + return 1; + + case Matcher::CheckImmAllZerosV: + OS << "OPC_CheckImmAllZerosV,\n"; + return 1; + case Matcher::EmitInteger: { int64_t Val = cast(N)->getValue(); OS << "OPC_EmitInteger, " @@ -1007,6 +1015,8 @@ static StringRef getOpcodeString(Matcher::KindTy Kind) { case Matcher::CheckOrImm: return "OPC_CheckOrImm"; break; case Matcher::CheckFoldableChainNode: return "OPC_CheckFoldableChainNode"; break; + case Matcher::CheckImmAllOnesV: return "OPC_CheckImmAllOnesV"; break; + case Matcher::CheckImmAllZerosV: return "OPC_CheckImmAllZerosV"; break; case Matcher::EmitInteger: return "OPC_EmitInteger"; break; case Matcher::EmitStringInteger: return "OPC_EmitStringInteger"; break; case Matcher::EmitRegister: return "OPC_EmitRegister"; break; diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp index 1a04905..b2285be 100644 --- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp +++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp @@ -277,6 +277,27 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) { return; } + if (LeafRec->getName() == "immAllOnesV") { + // If this is the root of the dag we're matching, we emit a redundant opcode + // check to ensure that this gets folded into the normal top-level + // OpcodeSwitch. + if (N == Pattern.getSrcPattern()) { + const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector")); + AddMatcher(new CheckOpcodeMatcher(NI)); + } + return AddMatcher(new CheckImmAllOnesVMatcher()); + } + if (LeafRec->getName() == "immAllZerosV") { + // If this is the root of the dag we're matching, we emit a redundant opcode + // check to ensure that this gets folded into the normal top-level + // OpcodeSwitch. + if (N == Pattern.getSrcPattern()) { + const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("build_vector")); + AddMatcher(new CheckOpcodeMatcher(NI)); + } + return AddMatcher(new CheckImmAllZerosVMatcher()); + } + errs() << "Unknown leaf kind: " << *N << "\n"; abort(); } -- 2.7.4