From 9fcd212e2f678fdbdf304399a1e58ca490dc54d1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 28 Feb 2020 15:42:47 -0800 Subject: [PATCH] [X86] Remove isel patterns from broadcast of loadi32. We already combine non extending loads with broadcasts in DAG combine. All these patterns are picking up is the aligned extload special case. But the only lit test we have that exercsises it is using v8i1 load that datalayout is reporting align 8 for. That seems generous. So without a realistic test case I don't think there is much value in these patterns. --- llvm/lib/Target/X86/X86InstrAVX512.td | 10 ---------- llvm/lib/Target/X86/X86InstrSSE.td | 6 ------ llvm/test/CodeGen/X86/vector-sext.ll | 3 ++- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index a2bd6a2853a0..1d3ef67c9d3d 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1427,10 +1427,6 @@ let Predicates = [HasAVX512] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZrm addr:$src)>; - - // FIXME this is to handle aligned extloads from i8. - def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDZrm addr:$src)>; } let Predicates = [HasVLX] in { @@ -1439,12 +1435,6 @@ let Predicates = [HasVLX] in { (VPBROADCASTQZ128rm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZ256rm addr:$src)>; - - // FIXME this is to handle aligned extloads from i8. - def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDZ128rm addr:$src)>; - def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDZ256rm addr:$src)>; } let Predicates = [HasVLX, HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index e66f15747787..73bba723ab96 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7529,12 +7529,6 @@ let Predicates = [HasAVX2, NoVLX] in { (VPBROADCASTQrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQYrm addr:$src)>; - - // FIXME this is to handle aligned extloads from i8/i16. - def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDrm addr:$src)>; - def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VPBROADCASTDYrm addr:$src)>; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 44ba29d978e2..0b35db5cadb2 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -2259,7 +2259,8 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; ; AVX2-LABEL: load_sext_8i1_to_8i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0 +; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 -- 2.34.1