From 3f722d40c55c9cffd52706570d096b9012b56a0b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 1 Jul 2019 21:25:11 +0000 Subject: [PATCH] [X86] Use v4i32 vzloads instead of v2i64 for vpmovzx/vpmovsx patterns where only 32-bits are loaded. v2i64 vzload defines a 64-bit memory access. It doesn't look like we have any coverage for this either way. Also remove some vzload usages where the instruction loads only 16-bits. llvm-svn: 364851 --- llvm/lib/Target/X86/X86InstrAVX512.td | 8 +++----- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 ++ llvm/lib/Target/X86/X86InstrSSE.td | 6 ++---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f9390c8..1ec34e3 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9738,15 +9738,13 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), - (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; @@ -9761,7 +9759,7 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#WQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; @@ -9785,7 +9783,7 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 814dfa1..4077bdc 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -939,6 +939,8 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>; def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>; +def vzload_v4i32 : PatFrag<(ops node:$src), + (bitconvert (v4i32 (X86vzload node:$src)))>; def vzload_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzload node:$src)))>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a027f2b..1331618 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5031,15 +5031,13 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#BDrm) addr:$src)>; - def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v4i32 addr:$src)))), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), (!cast(OpcPrefix#BQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), - (!cast(OpcPrefix#BQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BQrm) addr:$src)>; @@ -5054,7 +5052,7 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#WQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v4i32 addr:$src)))), (!cast(OpcPrefix#WQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), (!cast(OpcPrefix#WQrm) addr:$src)>; -- 2.7.4