[X86] X86FixupVectorConstantsPass - attempt to replace full width fp vector constant...
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 27 May 2023 16:59:19 +0000 (17:59 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 29 May 2023 15:10:52 +0000 (16:10 +0100)
commit98061013e01207444cfd3980cde17b5e75764fbe
treea1ca3d3e6baa3611eb9d3cf9d507dfa3225e6777
parent7fb60b0123e50389afbde0286a0e59923d154210
[X86] X86FixupVectorConstantsPass - attempt to replace full width fp vector constant loads with broadcasts on AVX+ targets

lowerBuildVectorAsBroadcast will not broadcast splat constants in all cases, resulting in a lot of situations where a full width vector load that has failed to fold but is loading splat constant values could use a broadcast load instruction just as cheaply, and save constant pool space.

NOTE: SSE3 targets can use MOVDDUP but not all SSE era CPUs can perform this as cheaply as a vector load, we will need to add scheduler model checks if we want to pursue this.
81 files changed:
llvm/lib/Target/X86/X86FixupVectorConstants.cpp
llvm/test/CodeGen/X86/avx-basic.ll
llvm/test/CodeGen/X86/avx-vbroadcast.ll
llvm/test/CodeGen/X86/avx2-conversions.ll
llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
llvm/test/CodeGen/X86/avx2-vbroadcast.ll
llvm/test/CodeGen/X86/avx512-regcall-Mask.ll
llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/test/CodeGen/X86/bitreverse.ll
llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
llvm/test/CodeGen/X86/cast-vsel.ll
llvm/test/CodeGen/X86/combine-and.ll
llvm/test/CodeGen/X86/combine-sdiv.ll
llvm/test/CodeGen/X86/combine-udiv.ll
llvm/test/CodeGen/X86/extractelement-load.ll
llvm/test/CodeGen/X86/fma-fneg-combine-2.ll
llvm/test/CodeGen/X86/fma-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/fma_patterns.ll
llvm/test/CodeGen/X86/fma_patterns_wide.ll
llvm/test/CodeGen/X86/fminimum-fmaximum.ll
llvm/test/CodeGen/X86/fold-vector-sext-zext.ll
llvm/test/CodeGen/X86/fold-vector-trunc-sitofp.ll
llvm/test/CodeGen/X86/fp-round.ll
llvm/test/CodeGen/X86/insert-into-constant-vector.ll
llvm/test/CodeGen/X86/known-bits-vector.ll
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
llvm/test/CodeGen/X86/memset-nonzero.ll
llvm/test/CodeGen/X86/merge-store-constants.ll
llvm/test/CodeGen/X86/oddshuffles.ll
llvm/test/CodeGen/X86/paddus.ll
llvm/test/CodeGen/X86/pr30290.ll
llvm/test/CodeGen/X86/pr32368.ll
llvm/test/CodeGen/X86/pr38639.ll
llvm/test/CodeGen/X86/psubus.ll
llvm/test/CodeGen/X86/recip-fastmath.ll
llvm/test/CodeGen/X86/recip-fastmath2.ll
llvm/test/CodeGen/X86/sadd_sat_vec.ll
llvm/test/CodeGen/X86/sat-add.ll
llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
llvm/test/CodeGen/X86/splat-const.ll
llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
llvm/test/CodeGen/X86/sqrt-fastmath.ll
llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
llvm/test/CodeGen/X86/sse2.ll
llvm/test/CodeGen/X86/sshl_sat_vec.ll
llvm/test/CodeGen/X86/ssub_sat_vec.ll
llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll
llvm/test/CodeGen/X86/v8i1-masks.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
llvm/test/CodeGen/X86/vec_anyext.ll
llvm/test/CodeGen/X86/vec_fabs.ll
llvm/test/CodeGen/X86/vec_fp_to_int.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/test/CodeGen/X86/vector-trunc-math.ll
llvm/test/CodeGen/X86/vector-trunc-ssat.ll
llvm/test/CodeGen/X86/vector-trunc-usat.ll
llvm/test/CodeGen/X86/vector-trunc.ll
llvm/test/CodeGen/X86/vselect-avx.ll
llvm/test/CodeGen/X86/vselect-zero.ll
llvm/test/CodeGen/X86/win_cst_pool.ll