[X86][SSE] lowerV2I64Shuffle - use undef elements in PSHUFD mask widening
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Sun, 26 Jul 2020 15:03:53 +0000 (16:03 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Sun, 26 Jul 2020 15:04:22 +0000 (16:04 +0100)
commit17eafe0841d6e523d410771c8d4de99d5881c59d
treecab27b55b7b9bf0b55202096d62ea17f7c1f38b9
parentd135744c34dc7a6315f1d2d65a969a2791a97534
[X86][SSE] lowerV2I64Shuffle - use undef elements in PSHUFD mask widening

If we lower a v2i64 shuffle to PSHUFD, we currently clamp undef elements to 0, (elements 0,1 of the v4i32) which can result in the shuffle referencing more elements of the source vector than expected, affecting later shuffle combines and KnownBits/SimplifyDemanded calls.

By ensuring we widen the undef mask element we allow getV4X86ShuffleImm8 to use inline elements as the default, which are more likely to fold.
137 files changed:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avg.ll
llvm/test/CodeGen/X86/avx-cvt.ll
llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/buildvec-extract.ll
llvm/test/CodeGen/X86/cast-vsel.ll
llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
llvm/test/CodeGen/X86/combine-movmsk-avx.ll
llvm/test/CodeGen/X86/combine-sdiv.ll
llvm/test/CodeGen/X86/combine-shl.ll
llvm/test/CodeGen/X86/combine-sra.ll
llvm/test/CodeGen/X86/combine-srl.ll
llvm/test/CodeGen/X86/combine-udiv.ll
llvm/test/CodeGen/X86/combine-urem.ll
llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
llvm/test/CodeGen/X86/extract-store.ll
llvm/test/CodeGen/X86/extractelement-index.ll
llvm/test/CodeGen/X86/extractelement-load.ll
llvm/test/CodeGen/X86/gather-addresses.ll
llvm/test/CodeGen/X86/haddsub-2.ll
llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
llvm/test/CodeGen/X86/horizontal-reduce-add.ll
llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
llvm/test/CodeGen/X86/i128-add.ll
llvm/test/CodeGen/X86/inline-asm-x-i128.ll
llvm/test/CodeGen/X86/known-bits-vector.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
llvm/test/CodeGen/X86/madd.ll
llvm/test/CodeGen/X86/masked_compressstore.ll
llvm/test/CodeGen/X86/masked_gather.ll
llvm/test/CodeGen/X86/masked_load.ll
llvm/test/CodeGen/X86/masked_store.ll
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll
llvm/test/CodeGen/X86/min-legal-vector-width.ll
llvm/test/CodeGen/X86/nontemporal-2.ll
llvm/test/CodeGen/X86/oddshuffles.ll
llvm/test/CodeGen/X86/phaddsub-extract.ll
llvm/test/CodeGen/X86/pmul.ll
llvm/test/CodeGen/X86/pmulh.ll
llvm/test/CodeGen/X86/pr15267.ll
llvm/test/CodeGen/X86/pr39733.ll
llvm/test/CodeGen/X86/pr42452.ll
llvm/test/CodeGen/X86/pr42905.ll
llvm/test/CodeGen/X86/pr44976.ll
llvm/test/CodeGen/X86/pr45378.ll
llvm/test/CodeGen/X86/pr46189.ll
llvm/test/CodeGen/X86/pr46455.ll
llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
llvm/test/CodeGen/X86/psubus.ll
llvm/test/CodeGen/X86/sad.ll
llvm/test/CodeGen/X86/sdiv_fix.ll
llvm/test/CodeGen/X86/sdiv_fix_sat.ll
llvm/test/CodeGen/X86/setcc-wide-types.ll
llvm/test/CodeGen/X86/shrink_vmul.ll
llvm/test/CodeGen/X86/slow-pmulld.ll
llvm/test/CodeGen/X86/smul_fix_sat.ll
llvm/test/CodeGen/X86/split-extend-vector-inreg.ll
llvm/test/CodeGen/X86/split-vector-rem.ll
llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/sse41.ll
llvm/test/CodeGen/X86/trunc-subvector.ll
llvm/test/CodeGen/X86/udiv_fix.ll
llvm/test/CodeGen/X86/udiv_fix_sat.ll
llvm/test/CodeGen/X86/uint_to_fp-3.ll
llvm/test/CodeGen/X86/umul_fix_sat.ll
llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
llvm/test/CodeGen/X86/var-permute-128.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
llvm/test/CodeGen/X86/vec_cast2.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
llvm/test/CodeGen/X86/vec_saddo.ll
llvm/test/CodeGen/X86/vec_smulo.ll
llvm/test/CodeGen/X86/vec_ssubo.ll
llvm/test/CodeGen/X86/vec_uaddo.ll
llvm/test/CodeGen/X86/vec_umulo.ll
llvm/test/CodeGen/X86/vec_usubo.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
llvm/test/CodeGen/X86/vector-fshl-128.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/test/CodeGen/X86/vector-fshl-rot-256.ll
llvm/test/CodeGen/X86/vector-fshr-128.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
llvm/test/CodeGen/X86/vector-pcmp.ll
llvm/test/CodeGen/X86/vector-reduce-add.ll
llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
llvm/test/CodeGen/X86/vector-reduce-and.ll
llvm/test/CodeGen/X86/vector-reduce-mul.ll
llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
llvm/test/CodeGen/X86/vector-reduce-or.ll
llvm/test/CodeGen/X86/vector-reduce-smax.ll
llvm/test/CodeGen/X86/vector-reduce-smin.ll
llvm/test/CodeGen/X86/vector-reduce-umax.ll
llvm/test/CodeGen/X86/vector-reduce-umin.ll
llvm/test/CodeGen/X86/vector-reduce-xor.ll
llvm/test/CodeGen/X86/vector-rem.ll
llvm/test/CodeGen/X86/vector-rotate-128.ll
llvm/test/CodeGen/X86/vector-rotate-256.ll
llvm/test/CodeGen/X86/vector-sext.ll
llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
llvm/test/CodeGen/X86/vector-shift-shl-128.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/test/CodeGen/X86/vector-shuffle-sse4a.ll
llvm/test/CodeGen/X86/vector-zext.ll
llvm/test/CodeGen/X86/vsel-cmp-load.ll
llvm/test/CodeGen/X86/vselect-avx.ll
llvm/test/CodeGen/X86/vselect-pcmp.ll
llvm/test/CodeGen/X86/vshift-4.ll
llvm/test/CodeGen/X86/widen_conv-4.ll
llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
llvm/test/CodeGen/X86/xor.ll