Add new pass `X86FixupInstTuning` for fixing up machine-instruction selection.
authorNoah Goldstein <goldstein.w.n@gmail.com>
Thu, 16 Feb 2023 17:57:12 +0000 (11:57 -0600)
committerNoah Goldstein <goldstein.w.n@gmail.com>
Tue, 28 Feb 2023 00:53:25 +0000 (18:53 -0600)
commit69a322fed19b977d15be9500d8653496b73673e9
tree5c8ad7bb7fc9cc54549995ac64b087a4827ca22c
parente56ddae849317a7f41f97a8a9c41cf63ce40e4f8
Add new pass `X86FixupInstTuning` for fixing up machine-instruction selection.

There are a variety of cases where we want more control over the exact
instruction emitted. This commit creates a new pass to fixup
instructions after the DAG has been lowered. The pass is only meant to
replace instructions that are guranteed to be interchangable, not to
do analysis for special cases.

Handling these instruction changes in in X86ISelLowering of
X86ISelDAGToDAG isn't ideal, as its liable to either break existing
patterns that expected a certain instruction or generate infinite
loops.

As well, operating as the MachineInstruction level allows us to access
scheduling/code size information for making the decisions.

Currently only implements `{v}permilps` -> `{v}shufps/{v}shufd` but
more transforms can be added.

Differential Revision: https://reviews.llvm.org/D143787
154 files changed:
llvm/lib/Target/X86/CMakeLists.txt
llvm/lib/Target/X86/X86.h
llvm/lib/Target/X86/X86FixupInstTuning.cpp [new file with mode: 0644]
llvm/lib/Target/X86/X86TargetMachine.cpp
llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
llvm/test/CodeGen/X86/SwizzleShuff.ll
llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
llvm/test/CodeGen/X86/avx-splat.ll
llvm/test/CodeGen/X86/avx-vbroadcast.ll
llvm/test/CodeGen/X86/avx-vinsertf128.ll
llvm/test/CodeGen/X86/avx-vperm2x128.ll
llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/avx512-cvt.ll
llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll
llvm/test/CodeGen/X86/avx512-shuffles/shuffle.ll
llvm/test/CodeGen/X86/avx512-trunc.ll
llvm/test/CodeGen/X86/avx512-vec-cmp.ll
llvm/test/CodeGen/X86/avx512fp16-mov.ll
llvm/test/CodeGen/X86/avx512fp16-mscatter.ll
llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
llvm/test/CodeGen/X86/buildvec-extract.ll
llvm/test/CodeGen/X86/combine-and.ll
llvm/test/CodeGen/X86/combine-concatvectors.ll
llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll
llvm/test/CodeGen/X86/extract-concat.ll
llvm/test/CodeGen/X86/extract-store.ll
llvm/test/CodeGen/X86/fdiv-combine-vec.ll
llvm/test/CodeGen/X86/fmaddsub-combine.ll
llvm/test/CodeGen/X86/haddsub-2.ll
llvm/test/CodeGen/X86/haddsub-4.ll
llvm/test/CodeGen/X86/haddsub-undef.ll
llvm/test/CodeGen/X86/haddsub.ll
llvm/test/CodeGen/X86/horizontal-reduce-smax.ll
llvm/test/CodeGen/X86/horizontal-reduce-smin.ll
llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
llvm/test/CodeGen/X86/horizontal-shuffle-3.ll
llvm/test/CodeGen/X86/horizontal-shuffle-4.ll
llvm/test/CodeGen/X86/horizontal-sum.ll
llvm/test/CodeGen/X86/i64-to-float.ll
llvm/test/CodeGen/X86/insertelement-var-index.ll
llvm/test/CodeGen/X86/known-bits-vector.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
llvm/test/CodeGen/X86/masked_store.ll
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
llvm/test/CodeGen/X86/matrix-multiply.ll
llvm/test/CodeGen/X86/oddshuffles.ll
llvm/test/CodeGen/X86/opt-pipeline.ll
llvm/test/CodeGen/X86/packss.ll
llvm/test/CodeGen/X86/palignr.ll
llvm/test/CodeGen/X86/pr31956.ll
llvm/test/CodeGen/X86/pr40730.ll
llvm/test/CodeGen/X86/pr40811.ll
llvm/test/CodeGen/X86/pr50609.ll
llvm/test/CodeGen/X86/rotate_vec.ll
llvm/test/CodeGen/X86/scalarize-fp.ll
llvm/test/CodeGen/X86/shuffle-of-shift.ll
llvm/test/CodeGen/X86/shuffle-of-splat-multiuses.ll
llvm/test/CodeGen/X86/sse-fsignum.ll
llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
llvm/test/CodeGen/X86/sse2.ll
llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
llvm/test/CodeGen/X86/sse41.ll
llvm/test/CodeGen/X86/swizzle-avx2.ll
llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
llvm/test/CodeGen/X86/tuning-shuffle-permilps.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll
llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll
llvm/test/CodeGen/X86/vec_fp_to_int.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
llvm/test/CodeGen/X86/vec_umulo.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-half-conversions.ll
llvm/test/CodeGen/X86/vector-interleave.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-2.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll
llvm/test/CodeGen/X86/vector-reduce-and.ll
llvm/test/CodeGen/X86/vector-reduce-fadd.ll
llvm/test/CodeGen/X86/vector-reduce-fmax.ll
llvm/test/CodeGen/X86/vector-reduce-fmin.ll
llvm/test/CodeGen/X86/vector-reduce-fmul.ll
llvm/test/CodeGen/X86/vector-reduce-or.ll
llvm/test/CodeGen/X86/vector-reduce-smax.ll
llvm/test/CodeGen/X86/vector-reduce-smin.ll
llvm/test/CodeGen/X86/vector-reduce-umax.ll
llvm/test/CodeGen/X86/vector-reduce-umin.ll
llvm/test/CodeGen/X86/vector-reduce-xor.ll
llvm/test/CodeGen/X86/vector-sext.ll
llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-shl-256.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll
llvm/test/CodeGen/X86/vector-shuffle-concatenation.ll
llvm/test/CodeGen/X86/vector-trunc-ssat.ll
llvm/test/CodeGen/X86/vector-trunc-usat.ll
llvm/test/CodeGen/X86/vselect-avx.ll
llvm/test/CodeGen/X86/x86-interleaved-access.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn