[X86] X86FixupVectorConstantsPass - attempt to replace full width integer vector...
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Wed, 14 Jun 2023 11:25:59 +0000 (12:25 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Wed, 14 Jun 2023 11:48:33 +0000 (12:48 +0100)
commitf6ff2cc7e0ae4fd9b14583a998ddeada256a954f
tree01bcb070ff864b2437a1e5f7f50f4d183052f8a5
parentffd7a200fdfbd01ef296101647d2f2da91ddfd41
[X86] X86FixupVectorConstantsPass - attempt to replace full width integer vector constant loads with broadcasts on AVX2+ targets (REAPPLIED)

lowerBuildVectorAsBroadcast will not broadcast splat constants in all cases, resulting in a lot of situations where a full width vector load that has failed to fold but is loading splat constant values could use a broadcast load instruction just as cheaply, and save constant pool space.

This is an updated commit of ab4b924832ce26c21b88d7f82fcf4992ea8906bb after being reverted at 78de45fd4a902066617fcc9bb88efee11f743bc6
212 files changed:
llvm/lib/Target/X86/X86FixupVectorConstants.cpp
llvm/test/CodeGen/X86/abdu-vector-128.ll
llvm/test/CodeGen/X86/abdu-vector-256.ll
llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
llvm/test/CodeGen/X86/avx-logic.ll
llvm/test/CodeGen/X86/avx-shift.ll
llvm/test/CodeGen/X86/avx2-arith.ll
llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
llvm/test/CodeGen/X86/avx2-shift.ll
llvm/test/CodeGen/X86/avx2-vector-shifts.ll
llvm/test/CodeGen/X86/avx512-arith.ll
llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
llvm/test/CodeGen/X86/bitcast-vector-bool.ll
llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
llvm/test/CodeGen/X86/combine-add.ll
llvm/test/CodeGen/X86/combine-bitreverse.ll
llvm/test/CodeGen/X86/combine-bitselect.ll
llvm/test/CodeGen/X86/combine-pavg.ll
llvm/test/CodeGen/X86/combine-pmuldq.ll
llvm/test/CodeGen/X86/combine-sdiv.ll
llvm/test/CodeGen/X86/combine-shl.ll
llvm/test/CodeGen/X86/combine-smax.ll
llvm/test/CodeGen/X86/combine-smin.ll
llvm/test/CodeGen/X86/combine-sra.ll
llvm/test/CodeGen/X86/combine-srl.ll
llvm/test/CodeGen/X86/combine-sub-usat.ll
llvm/test/CodeGen/X86/combine-udiv.ll
llvm/test/CodeGen/X86/combine-urem.ll
llvm/test/CodeGen/X86/concat-cast.ll
llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
llvm/test/CodeGen/X86/dpbusd_i4.ll
llvm/test/CodeGen/X86/freeze-vector.ll
llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
llvm/test/CodeGen/X86/gfni-rotates.ll
llvm/test/CodeGen/X86/gfni-shifts.ll
llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
llvm/test/CodeGen/X86/i64-to-float.ll
llvm/test/CodeGen/X86/icmp-pow2-diff.ll
llvm/test/CodeGen/X86/insert-into-constant-vector.ll
llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
llvm/test/CodeGen/X86/min-legal-vector-width.ll
llvm/test/CodeGen/X86/movmsk-cmp.ll
llvm/test/CodeGen/X86/oddshuffles.ll
llvm/test/CodeGen/X86/paddus.ll
llvm/test/CodeGen/X86/pmaddubsw.ll
llvm/test/CodeGen/X86/pmul.ll
llvm/test/CodeGen/X86/pmulh.ll
llvm/test/CodeGen/X86/pr31773.ll
llvm/test/CodeGen/X86/pr37499.ll
llvm/test/CodeGen/X86/pr63108.ll
llvm/test/CodeGen/X86/prefer-avx256-lzcnt.ll
llvm/test/CodeGen/X86/prefer-avx256-popcnt.ll
llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll
llvm/test/CodeGen/X86/psubus.ll
llvm/test/CodeGen/X86/sadd_sat_vec.ll
llvm/test/CodeGen/X86/sat-add.ll
llvm/test/CodeGen/X86/setcc-non-simple-type.ll
llvm/test/CodeGen/X86/shrink_vmul.ll
llvm/test/CodeGen/X86/shuffle-strided-with-offset-256.ll
llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
llvm/test/CodeGen/X86/slow-pmulld.ll
llvm/test/CodeGen/X86/splat-for-size.ll
llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll
llvm/test/CodeGen/X86/sshl_sat_vec.ll
llvm/test/CodeGen/X86/ssub_sat_vec.ll
llvm/test/CodeGen/X86/uadd_sat_vec.ll
llvm/test/CodeGen/X86/umax.ll
llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
llvm/test/CodeGen/X86/urem-seteq-vec-splat.ll
llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll
llvm/test/CodeGen/X86/usub_sat_vec.ll
llvm/test/CodeGen/X86/var-permute-256.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
llvm/test/CodeGen/X86/vec_anyext.ll
llvm/test/CodeGen/X86/vec_cast3.ll
llvm/test/CodeGen/X86/vec_cmp_uint-128.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
llvm/test/CodeGen/X86/vec_minmax_uint.ll
llvm/test/CodeGen/X86/vec_smulo.ll
llvm/test/CodeGen/X86/vec_uaddo.ll
llvm/test/CodeGen/X86/vec_umulo.ll
llvm/test/CodeGen/X86/vec_usubo.ll
llvm/test/CodeGen/X86/vector-bitreverse.ll
llvm/test/CodeGen/X86/vector-blend.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll
llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
llvm/test/CodeGen/X86/vector-idiv.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-2.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
llvm/test/CodeGen/X86/vector-lzcnt-256.ll
llvm/test/CodeGen/X86/vector-lzcnt-512.ll
llvm/test/CodeGen/X86/vector-mul.ll
llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
llvm/test/CodeGen/X86/vector-popcnt-128.ll
llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll
llvm/test/CodeGen/X86/vector-popcnt-256.ll
llvm/test/CodeGen/X86/vector-popcnt-512-ult-ugt.ll
llvm/test/CodeGen/X86/vector-popcnt-512.ll
llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
llvm/test/CodeGen/X86/vector-reduce-umax.ll
llvm/test/CodeGen/X86/vector-reduce-umin.ll
llvm/test/CodeGen/X86/vector-replicaton-i1-mask.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-rotate-256.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-512.ll
llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/test/CodeGen/X86/vector-shuffle-v192.ll
llvm/test/CodeGen/X86/vector-shuffle-v48.ll
llvm/test/CodeGen/X86/vector-trunc-math.ll
llvm/test/CodeGen/X86/vector-trunc-packus.ll
llvm/test/CodeGen/X86/vector-trunc-ssat.ll
llvm/test/CodeGen/X86/vector-trunc-usat.ll
llvm/test/CodeGen/X86/vector-trunc.ll
llvm/test/CodeGen/X86/vector-tzcnt-128.ll
llvm/test/CodeGen/X86/vector-tzcnt-256.ll
llvm/test/CodeGen/X86/vector-tzcnt-512.ll
llvm/test/CodeGen/X86/vector-unsigned-cmp.ll
llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll
llvm/test/CodeGen/X86/vselect-avx.ll
llvm/test/CodeGen/X86/vselect-minmax.ll
llvm/test/CodeGen/X86/vselect-pcmp.ll
llvm/test/CodeGen/X86/vselect-post-combine.ll
llvm/test/CodeGen/X86/x86-interleaved-access.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll