[X86] X86FixupInstTunings - add VPERMILPDri -> VSHUFPDrri mapping
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Sun, 23 Apr 2023 10:48:50 +0000 (11:48 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Sun, 23 Apr 2023 10:48:50 +0000 (11:48 +0100)
commite9f9467da063875bd684e46660e2ff36ba4f55e2
treec2050da494466a58a5b14c07d002bd6019683cf3
parentb92839c9548a55bc7a3267b05d11c9d9c530b792
[X86] X86FixupInstTunings - add VPERMILPDri -> VSHUFPDrri mapping

Similar to the original VPERMILPSri -> VSHUFPSrri mapping added in D143787, replacing VPERMILPDri -> VSHUFPDrri should never be any slower and saves an encoding byte.

The sibling VPERMILPDmi -> VPSHUFDmi mapping is trickier as we need the same shuffle mask in every lane (and it needs to be adjusted) - I haven't attempted that yet but we can investigate it in the future if there's interest.

Fixes #61060

Differential Revision: https://reviews.llvm.org/D148999
72 files changed:
llvm/lib/Target/X86/X86FixupInstTuning.cpp
llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/test/CodeGen/X86/avx-vbroadcast.ll
llvm/test/CodeGen/X86/avx512-cvt.ll
llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll
llvm/test/CodeGen/X86/avx512fp16-mov.ll
llvm/test/CodeGen/X86/avx512fp16-mscatter.ll
llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/combine-and.ll
llvm/test/CodeGen/X86/complex-fastmath.ll
llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
llvm/test/CodeGen/X86/extract-concat.ll
llvm/test/CodeGen/X86/fmaddsub-combine.ll
llvm/test/CodeGen/X86/fmf-reduction.ll
llvm/test/CodeGen/X86/haddsub-2.ll
llvm/test/CodeGen/X86/haddsub-3.ll
llvm/test/CodeGen/X86/haddsub-broadcast.ll
llvm/test/CodeGen/X86/haddsub-shuf.ll
llvm/test/CodeGen/X86/haddsub-undef.ll
llvm/test/CodeGen/X86/haddsub.ll
llvm/test/CodeGen/X86/half.ll
llvm/test/CodeGen/X86/horizontal-reduce-fadd.ll
llvm/test/CodeGen/X86/horizontal-sum.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
llvm/test/CodeGen/X86/load-partial-dot-product.ll
llvm/test/CodeGen/X86/matrix-multiply.ll
llvm/test/CodeGen/X86/oddshuffles.ll
llvm/test/CodeGen/X86/pr40730.ll
llvm/test/CodeGen/X86/scalar-int-to-fp.ll
llvm/test/CodeGen/X86/scalarize-fp.ll
llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll
llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll
llvm/test/CodeGen/X86/tuning-shuffle-permilpd.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
llvm/test/CodeGen/X86/vec_fp_to_int.ll
llvm/test/CodeGen/X86/vector-half-conversions.ll
llvm/test/CodeGen/X86/vector-interleave.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll
llvm/test/CodeGen/X86/vector-narrow-binop.ll
llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll
llvm/test/CodeGen/X86/vector-reduce-fadd.ll
llvm/test/CodeGen/X86/vector-reduce-fmax-fmin-fast.ll
llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
llvm/test/CodeGen/X86/vector-reduce-fmax.ll
llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
llvm/test/CodeGen/X86/vector-reduce-fmin.ll
llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll
llvm/test/CodeGen/X86/vector-reduce-fmul.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/test/CodeGen/X86/x86-interleaved-access.ll