From 7d7577256b76e4293f455b8093504d5f7044ab4b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 14 Dec 2022 15:15:18 +0100 Subject: [PATCH] [LoopVectorize] Convert some tests to opaque pointers (NFC) --- .../Transforms/LoopVectorize/12-12-11-if-conv.ll | 8 +- .../LoopVectorize/2012-10-22-isconsec.ll | 27 +- .../AArch64/Oz-and-forced-vectorize.ll | 12 +- .../LoopVectorize/AArch64/aarch64-predication.ll | 11 +- .../LoopVectorize/AArch64/aarch64-unroll.ll | 16 +- .../LoopVectorize/AArch64/arm64-unroll.ll | 16 +- .../LoopVectorize/AArch64/backedge-overflow.ll | 72 +- .../AArch64/eliminate-tail-predication.ll | 6 +- ...vectorization-factor-for-unprofitable-memops.ll | 60 +- .../LoopVectorize/AArch64/gather-cost.ll | 34 +- .../LoopVectorize/AArch64/induction-trunc.ll | 14 +- .../interleaved-store-of-first-order-recurrence.ll | 27 +- .../LoopVectorize/AArch64/interleaved-vs-scalar.ll | 14 +- .../LoopVectorize/AArch64/interleaved_cost.ll | 182 +-- .../AArch64/loop-vectorization-factors.ll | 102 +- .../AArch64/loopvectorize_pr33804_double.ll | 54 +- .../LoopVectorize/AArch64/masked-call.ll | 230 ++-- .../LoopVectorize/AArch64/masked-op-cost.ll | 28 +- .../AArch64/max-vf-for-interleaved.ll | 18 +- .../AArch64/no_vector_instructions.ll | 12 +- .../outer_loop_test1_no_explicit_vect_width.ll | 32 +- .../Transforms/LoopVectorize/AArch64/pr31900.ll | 30 +- .../Transforms/LoopVectorize/AArch64/pr33053.ll | 12 +- .../Transforms/LoopVectorize/AArch64/pr36032.ll | 22 +- .../AArch64/pr46950-load-cast-context-crash.ll | 8 +- .../LoopVectorize/AArch64/predication_costs.ll | 72 +- .../LoopVectorize/AArch64/reduction-small-size.ll | 30 +- .../AArch64/runtime-check-size-based-threshold.ll | 78 +- .../LoopVectorize/AArch64/scalable-alloca.ll | 10 +- .../LoopVectorize/AArch64/scalable-call.ll | 88 +- .../AArch64/scalable-predicate-instruction.ll | 30 +- .../AArch64/scalable-reductions-tf.ll | 12 +- .../LoopVectorize/AArch64/scalable-reductions.ll | 94 +- .../AArch64/scalable-vectorization-cost-tuning.ll | 14 +- .../AArch64/scalable-vectorization.ll | 70 +- .../LoopVectorize/AArch64/scalable-vf-hint.ll | 110 +- .../AArch64/scalarize-store-with-predication.ll | 12 +- .../Transforms/LoopVectorize/AArch64/sdiv-pow2.ll | 10 +- .../LoopVectorize/AArch64/select-costs.ll | 8 +- .../AArch64/smallest-and-widest-types.ll | 18 +- .../LoopVectorize/AArch64/strict-fadd-cost.ll | 32 +- .../LoopVectorize/AArch64/strict-fadd-vf1.ll | 10 +- .../LoopVectorize/AArch64/sve-basic-vec.ll | 42 +- .../LoopVectorize/AArch64/sve-cond-inv-loads.ll | 133 +- .../AArch64/sve-gather-scatter-cost.ll | 104 +- .../LoopVectorize/AArch64/sve-gather-scatter.ll | 173 ++- .../LoopVectorize/AArch64/sve-inductions.ll | 28 +- .../LoopVectorize/AArch64/sve-inv-loads.ll | 22 +- .../LoopVectorize/AArch64/sve-large-strides.ll | 46 +- .../LoopVectorize/AArch64/sve-low-trip-count.ll | 40 +- .../LoopVectorize/AArch64/sve-masked-loadstore.ll | 44 +- .../AArch64/sve-scalable-load-in-loop.ll | 6 +- .../LoopVectorize/AArch64/sve-select-cmp.ll | 36 +- .../LoopVectorize/AArch64/sve-strict-fadd-cost.ll | 12 +- .../LoopVectorize/AArch64/sve-type-conv.ll | 110 +- .../AArch64/sve-vector-reverse-mask4.ll | 16 +- .../LoopVectorize/AArch64/sve-vector-reverse.ll | 68 +- .../AArch64/sve-widen-extractvalue.ll | 6 +- .../AArch64/type-shrinkage-insertelt.ll | 14 +- .../LoopVectorize/AArch64/unsafe-vf-hint-remark.ll | 14 +- .../AArch64/veclib-calls-libsystem-darwin.ll | 300 ++-- .../LoopVectorize/AArch64/vector-reverse-mask4.ll | 56 +- .../AMDGPU/divergent-runtime-check.ll | 10 +- .../Transforms/LoopVectorize/AMDGPU/packed-fp32.ll | 6 +- .../Transforms/LoopVectorize/AMDGPU/packed-math.ll | 30 +- .../AMDGPU/unroll-in-loop-vectorizer.ll | 8 +- .../LoopVectorize/ARM/arm-ieee-vectorize.ll | 124 +- .../Transforms/LoopVectorize/ARM/arm-unroll.ll | 12 +- .../Transforms/LoopVectorize/ARM/gather-cost.ll | 34 +- .../Transforms/LoopVectorize/ARM/gcc-examples.ll | 22 +- .../LoopVectorize/ARM/interleaved_cost.ll | 144 +- .../LoopVectorize/ARM/mve-interleaved-cost.ll | 1428 ++++++++++---------- .../Transforms/LoopVectorize/ARM/mve-maskedldst.ll | 46 +- .../LoopVectorize/ARM/mve-predstorecost.ll | 54 +- .../LoopVectorize/ARM/mve-reduction-predselect.ll | 191 ++- .../Transforms/LoopVectorize/ARM/mve-reductions.ll | 447 +++--- .../Transforms/LoopVectorize/ARM/mve-shiftcost.ll | 8 +- .../LoopVectorize/ARM/prefer-tail-loop-folding.ll | 232 ++-- .../LoopVectorize/ARM/scalar-block-cost.ll | 44 +- .../LoopVectorize/ARM/tail-fold-multiple-icmps.ll | 19 +- .../LoopVectorize/ARM/tail-folding-loop-hint.ll | 38 +- .../LoopVectorize/ARM/tail-folding-prefer-flag.ll | 62 +- .../ARM/tail-folding-reductions-allowed.ll | 6 +- .../Transforms/LoopVectorize/ARM/width-detect.ll | 12 +- ...idate-cm-after-invalidating-interleavegroups.ll | 52 +- .../LoopVectorize/Hexagon/maximum-vf-crash.ll | 2 +- .../Transforms/LoopVectorize/Hexagon/minimum-vf.ll | 96 +- .../LoopVectorize/PowerPC/interleave_IC.ll | 42 +- ...terleaved-pointer-runtime-check-unprofitable.ll | 84 +- .../LoopVectorize/PowerPC/large-loop-rdx.ll | 28 +- .../LoopVectorize/PowerPC/massv-altivec.ll | 24 +- .../LoopVectorize/PowerPC/massv-calls.ll | 388 +++--- .../LoopVectorize/PowerPC/massv-nobuiltin.ll | 12 +- .../LoopVectorize/PowerPC/massv-unsupported.ll | 24 +- .../optimal-epilog-vectorization-profitability.ll | 42 +- .../Transforms/LoopVectorize/PowerPC/pr41179.ll | 26 +- .../LoopVectorize/PowerPC/small-loop-rdx.ll | 6 +- .../LoopVectorize/PowerPC/stride-vectorization.ll | 14 +- .../PowerPC/vectorize-only-for-real.ll | 32 +- .../LoopVectorize/PowerPC/vsx-tsvc-s173.ll | 18 +- .../LoopVectorize/PowerPC/widened-massv-call.ll | 11 +- .../PowerPC/widened-massv-vfabi-attr.ll | 11 +- .../LoopVectorize/RISCV/riscv-interleaved.ll | 6 +- .../LoopVectorize/RISCV/scalable-reductions.ll | 90 +- .../LoopVectorize/RISCV/scalable-vf-hint.ll | 14 +- .../RISCV/unroll-in-loop-vectorizer.ll | 14 +- .../Transforms/LoopVectorize/SystemZ/addressing.ll | 43 +- .../SystemZ/branch-for-predicated-block.ll | 8 +- .../SystemZ/load-scalarization-cost-1.ll | 8 +- .../SystemZ/load-store-scalarization-cost.ll | 16 +- .../SystemZ/mem-interleaving-costs-02.ll | 88 +- .../SystemZ/mem-interleaving-costs.ll | 46 +- .../LoopVectorize/SystemZ/zero_unroll.ll | 4 +- .../test/Transforms/LoopVectorize/VE/disable_lv.ll | 10 +- .../LoopVectorize/X86/already-vectorized.ll | 4 +- llvm/test/Transforms/LoopVectorize/X86/avx1.ll | 16 +- llvm/test/Transforms/LoopVectorize/X86/avx512.ll | 18 +- .../LoopVectorize/X86/consecutive-ptr-cg-bug.ll | 8 +- .../LoopVectorize/X86/constant-vector-operand.ll | 8 +- .../Transforms/LoopVectorize/X86/cost-model.ll | 178 +-- .../LoopVectorize/X86/float-induction-x86.ll | 420 +++--- .../test/Transforms/LoopVectorize/X86/fneg-cost.ll | 8 +- .../LoopVectorize/X86/fp32_to_uint32-cost-model.ll | 8 +- .../LoopVectorize/X86/fp64_to_uint32-cost-model.ll | 10 +- .../LoopVectorize/X86/fp_to_sint8-cost-model.ll | 10 +- llvm/test/Transforms/LoopVectorize/X86/funclet.ll | 10 +- .../Transforms/LoopVectorize/X86/gather-cost.ll | 68 +- .../LoopVectorize/X86/gather-vs-interleave.ll | 8 +- .../Transforms/LoopVectorize/X86/gcc-examples.ll | 22 +- .../LoopVectorize/X86/int128_no_gather.ll | 26 +- .../LoopVectorize/X86/interleave_short_tc.ll | 10 +- .../X86/interleaved-accesses-large-gap.ll | 38 +- .../X86/interleaved-accesses-waw-dependency.ll | 79 +- .../Transforms/LoopVectorize/X86/interleaving.ll | 126 +- .../LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll | 86 +- .../LoopVectorize/X86/libm-vector-calls-finite.ll | 44 +- .../LoopVectorize/X86/libm-vector-calls.ll | 86 +- .../Transforms/LoopVectorize/X86/max-mstore.ll | 10 +- .../LoopVectorize/X86/min-trip-count-switch.ll | 8 +- .../Transforms/LoopVectorize/X86/mul_slm_16bit.ll | 20 +- .../test/Transforms/LoopVectorize/X86/no-vector.ll | 6 +- .../test/Transforms/LoopVectorize/X86/no_fpmath.ll | 12 +- .../LoopVectorize/X86/no_fpmath_with_hotness.ll | 12 +- .../Transforms/LoopVectorize/X86/nontemporal.ll | 38 +- .../X86/outer_loop_test1_no_explicit_vect_width.ll | 24 +- .../X86/parallel-loops-after-reg2mem.ll | 34 +- .../Transforms/LoopVectorize/X86/parallel-loops.ll | 141 +- .../test/Transforms/LoopVectorize/X86/powof2div.ll | 10 +- llvm/test/Transforms/LoopVectorize/X86/pr23997.ll | 78 +- llvm/test/Transforms/LoopVectorize/X86/pr39160.ll | 4 +- llvm/test/Transforms/LoopVectorize/X86/pr42674.ll | 14 +- ...-select-interleave-count-loop-with-cost-zero.ll | 12 +- .../LoopVectorize/X86/reduction-crash.ll | 12 +- .../LoopVectorize/X86/reduction-small-size.ll | 10 +- .../LoopVectorize/X86/redundant-vf2-cost.ll | 8 +- .../LoopVectorize/X86/reg-usage-debug.ll | 14 +- .../test/Transforms/LoopVectorize/X86/reg-usage.ll | 36 +- .../LoopVectorize/X86/register-assumption.ll | 2 +- .../Transforms/LoopVectorize/X86/runtime-limit.ll | 66 +- .../LoopVectorize/X86/slm-no-vectorize.ll | 10 +- .../Transforms/LoopVectorize/X86/struct-store.ll | 4 +- .../LoopVectorize/X86/svml-calls-finite.ll | 92 +- .../Transforms/LoopVectorize/X86/svml-calls.ll | 220 +-- .../X86/tail_folding_and_assume_safety.ll | 42 +- .../test/Transforms/LoopVectorize/X86/tripcount.ll | 6 +- .../LoopVectorize/X86/uint64_to_fp64-cost-model.ll | 10 +- .../Transforms/LoopVectorize/X86/uniform-phi.ll | 38 +- .../Transforms/LoopVectorize/X86/uniform_load.ll | 12 +- .../Transforms/LoopVectorize/X86/uniformshift.ll | 8 +- .../test/Transforms/LoopVectorize/X86/unroll-pm.ll | 8 +- .../LoopVectorize/X86/unroll-small-loops.ll | 30 +- .../LoopVectorize/X86/unroll_selection.ll | 16 +- .../Transforms/LoopVectorize/X86/veclib-calls.ll | 250 ++-- .../Transforms/LoopVectorize/X86/vect.omp.force.ll | 22 +- .../LoopVectorize/X86/vector-scalar-select-cost.ll | 24 +- .../LoopVectorize/X86/vector_max_bandwidth.ll | 36 +- .../LoopVectorize/X86/vector_ptr_load_store.ll | 50 +- .../X86/vectorization-remarks-loopid-dbg.ll | 16 +- .../X86/vectorization-remarks-missed.ll | 36 +- .../X86/vectorization-remarks-profitable.ll | 28 +- .../LoopVectorize/X86/vectorization-remarks.ll | 16 +- .../LoopVectorize/X86/vectorize-only-for-real.ll | 6 +- .../X86/x86-interleaved-accesses-masked-group.ll | 1154 ++++++++-------- .../x86-interleaved-store-accesses-with-gaps.ll | 206 ++- .../Transforms/LoopVectorize/X86/x86-pr39099.ll | 12 +- .../X86/x86_fp80-interleaved-access.ll | 10 +- .../LoopVectorize/X86/x86_fp80-vector-store.ll | 12 +- .../alias-set-with-uncomputable-bounds.ll | 80 +- llvm/test/Transforms/LoopVectorize/align.ll | 18 +- llvm/test/Transforms/LoopVectorize/assume.ll | 47 +- llvm/test/Transforms/LoopVectorize/bsd_regex.ll | 22 +- llvm/test/Transforms/LoopVectorize/calloc.ll | 16 +- .../Transforms/LoopVectorize/cast-induction.ll | 4 +- .../Transforms/LoopVectorize/check-prof-info.ll | 20 +- .../LoopVectorize/conditional-assignment.ll | 8 +- llvm/test/Transforms/LoopVectorize/control-flow.ll | 8 +- .../test/Transforms/LoopVectorize/cpp-new-array.ll | 35 +- llvm/test/Transforms/LoopVectorize/dbg.value.ll | 12 +- .../Transforms/LoopVectorize/dead_instructions.ll | 16 +- llvm/test/Transforms/LoopVectorize/debugloc.ll | 50 +- .../demanded-bits-of-pointer-instruction.ll | 8 +- .../LoopVectorize/diag-missing-instr-debug-loc.ll | 8 +- .../LoopVectorize/diag-with-hotness-info-2.ll | 84 +- .../LoopVectorize/diag-with-hotness-info.ll | 84 +- .../Transforms/LoopVectorize/disable_nonforced.ll | 6 +- .../LoopVectorize/disable_nonforced_enable.ll | 6 +- .../test/Transforms/LoopVectorize/discriminator.ll | 18 +- llvm/test/Transforms/LoopVectorize/exact.ll | 8 +- .../LoopVectorize/explicit_outer_detection.ll | 42 +- .../explicit_outer_nonuniform_inner.ll | 32 +- .../explicit_outer_uniform_diverg_branch.ll | 30 +- .../LoopVectorize/extract-last-veclane.ll | 64 +- .../Transforms/LoopVectorize/fix-reduction-dbg.ll | 6 +- llvm/test/Transforms/LoopVectorize/flags.ll | 22 +- .../Transforms/LoopVectorize/float-induction.ll | 504 ++++--- .../Transforms/LoopVectorize/float-reduction.ll | 24 +- llvm/test/Transforms/LoopVectorize/fneg.ll | 12 +- llvm/test/Transforms/LoopVectorize/followup.ll | 6 +- .../Transforms/LoopVectorize/forked-pointers.ll | 118 +- llvm/test/Transforms/LoopVectorize/funcall.ll | 8 +- llvm/test/Transforms/LoopVectorize/gcc-examples.ll | 306 ++--- .../Transforms/LoopVectorize/gep_with_bitcast.ll | 18 +- llvm/test/Transforms/LoopVectorize/hints-trans.ll | 6 +- llvm/test/Transforms/LoopVectorize/hoist-loads.ll | 22 +- llvm/test/Transforms/LoopVectorize/i8-induction.ll | 6 +- .../test/Transforms/LoopVectorize/icmp-uniforms.ll | 12 +- .../test/Transforms/LoopVectorize/if-conv-crash.ll | 4 +- .../LoopVectorize/if-conversion-edgemasks.ll | 26 +- .../LoopVectorize/if-conversion-reduction.ll | 6 +- .../test/Transforms/LoopVectorize/if-conversion.ll | 24 +- .../LoopVectorize/if-pred-not-when-safe.ll | 56 +- llvm/test/Transforms/LoopVectorize/if-reduction.ll | 152 +-- .../Transforms/LoopVectorize/incorrect-dom-info.ll | 12 +- llvm/test/Transforms/LoopVectorize/increment.ll | 18 +- .../induction-multiple-uses-in-same-instruction.ll | 18 +- .../LoopVectorize/induction-unroll-novec.ll | 14 +- llvm/test/Transforms/LoopVectorize/infiniteloop.ll | 6 +- .../Transforms/LoopVectorize/int_sideeffect.ll | 6 +- .../LoopVectorize/interleaved-accesses-1.ll | 23 +- .../LoopVectorize/interleaved-accesses-2.ll | 16 +- .../LoopVectorize/interleaved-accesses-3.ll | 14 +- .../LoopVectorize/interleaved-accesses-alias.ll | 28 +- .../interleaved-accesses-masked-group.ll | 60 +- .../interleaved-accesses-pred-stores.ll | 120 +- .../interleaved-accesses-uniform-load.ll | 14 +- .../interleaved-acess-with-remarks.ll | 34 +- llvm/test/Transforms/LoopVectorize/intrinsic.ll | 650 ++++----- .../Transforms/LoopVectorize/irregular_type.ll | 8 +- .../Transforms/LoopVectorize/libcall-remark.ll | 12 +- llvm/test/Transforms/LoopVectorize/lifetime.ll | 39 +- llvm/test/Transforms/LoopVectorize/loop-scalars.ll | 105 +- .../Transforms/LoopVectorize/loop-vect-memdep.ll | 16 +- .../Transforms/LoopVectorize/memdep-fold-tail.ll | 32 +- llvm/test/Transforms/LoopVectorize/memdep.ll | 96 +- .../Transforms/LoopVectorize/memory-dep-remarks.ll | 108 +- .../Transforms/LoopVectorize/metadata-unroll.ll | 6 +- .../Transforms/LoopVectorize/metadata-width.ll | 24 +- llvm/test/Transforms/LoopVectorize/metadata.ll | 14 +- .../Transforms/LoopVectorize/middle-block-dbg.ll | 16 +- llvm/test/Transforms/LoopVectorize/miniters.ll | 12 +- .../Transforms/LoopVectorize/minmax_reduction.ll | 208 +-- .../LoopVectorize/mixed-precision-remarks.ll | 26 +- .../LoopVectorize/multi-use-reduction-bug.ll | 8 +- .../LoopVectorize/multiple-exits-versioning.ll | 24 +- .../Transforms/LoopVectorize/no_array_bounds.ll | 22 +- .../LoopVectorize/no_array_bounds_scalable.ll | 24 +- .../Transforms/LoopVectorize/no_idiv_reduction.ll | 4 +- .../Transforms/LoopVectorize/no_int_induction.ll | 24 +- llvm/test/Transforms/LoopVectorize/no_switch.ll | 10 +- .../no_switch_disable_vectorization.ll | 10 +- .../Transforms/LoopVectorize/noalias-md-licm.ll | 18 +- llvm/test/Transforms/LoopVectorize/noalias-md.ll | 16 +- .../Transforms/LoopVectorize/noalias-scope-decl.ll | 43 +- .../Transforms/LoopVectorize/nofloat-report.ll | 4 +- llvm/test/Transforms/LoopVectorize/nofloat.ll | 4 +- llvm/test/Transforms/LoopVectorize/non-const-n.ll | 12 +- llvm/test/Transforms/LoopVectorize/nontemporal.ll | 14 +- llvm/test/Transforms/LoopVectorize/nounroll.ll | 18 +- .../LoopVectorize/novect-lcssa-cfg-invalidation.ll | 6 +- llvm/test/Transforms/LoopVectorize/nuw.ll | 14 +- llvm/test/Transforms/LoopVectorize/opt.ll | 6 +- .../optimal-epilog-vectorization-limitations.ll | 16 +- .../optimal-epilog-vectorization-scalable.ll | 6 +- llvm/test/Transforms/LoopVectorize/optsize.ll | 68 +- .../outer-loop-vec-phi-predecessor-order.ll | 20 +- .../Transforms/LoopVectorize/outer_loop_test1.ll | 16 +- .../Transforms/LoopVectorize/outer_loop_test2.ll | 24 +- .../test/Transforms/LoopVectorize/partial-lcssa.ll | 18 +- llvm/test/Transforms/LoopVectorize/phi-cost.ll | 28 +- llvm/test/Transforms/LoopVectorize/pr25281.ll | 28 +- llvm/test/Transforms/LoopVectorize/pr28541.ll | 10 +- .../Transforms/LoopVectorize/pr30806-phi-scev.ll | 25 +- llvm/test/Transforms/LoopVectorize/pr31098.ll | 26 +- llvm/test/Transforms/LoopVectorize/pr31190.ll | 8 +- llvm/test/Transforms/LoopVectorize/pr33706.ll | 30 +- llvm/test/Transforms/LoopVectorize/pr34681.ll | 12 +- llvm/test/Transforms/LoopVectorize/pr36311.ll | 6 +- llvm/test/Transforms/LoopVectorize/pr37248.ll | 6 +- llvm/test/Transforms/LoopVectorize/pr39099.ll | 14 +- .../LoopVectorize/pr44488-predication.ll | 16 +- llvm/test/Transforms/LoopVectorize/pr45525.ll | 6 +- .../LoopVectorize/pr45679-fold-tail-by-masking.ll | 164 +-- .../LoopVectorize/pr46525-expander-insertpoint.ll | 8 +- .../pr47343-expander-lcssa-after-cfg-update.ll | 30 +- llvm/test/Transforms/LoopVectorize/pr48832.ll | 4 +- .../LoopVectorize/pr51614-fold-tail-by-masking.ll | 32 +- .../preserve-dbg-loc-and-loop-metadata.ll | 8 +- .../test/Transforms/LoopVectorize/ptr-induction.ll | 12 +- llvm/test/Transforms/LoopVectorize/ptr_loops.ll | 42 +- llvm/test/Transforms/LoopVectorize/read-only.ll | 20 +- .../LoopVectorize/reduction-inloop-cond.ll | 250 ++-- .../LoopVectorize/reduction-inloop-pred.ll | 530 ++++---- .../LoopVectorize/reduction-inloop-uf4.ll | 166 ++- .../Transforms/LoopVectorize/reduction-inloop.ll | 359 +++-- .../Transforms/LoopVectorize/reduction-order.ll | 2 +- .../LoopVectorize/reduction-predselect.ll | 360 ++--- .../test/Transforms/LoopVectorize/reduction-ptr.ll | 20 +- llvm/test/Transforms/LoopVectorize/reduction.ll | 150 +- .../LoopVectorize/remarks-multi-exit-loops.ll | 6 +- .../Transforms/LoopVectorize/remove_metadata.ll | 6 +- .../Transforms/LoopVectorize/reverse_induction.ll | 26 +- llvm/test/Transforms/LoopVectorize/reverse_iter.ll | 6 +- .../LoopVectorize/runtime-check-address-space.ll | 68 +- .../runtime-check-pointer-element-type.ll | 14 +- .../runtime-check-readonly-address-space.ll | 70 +- .../LoopVectorize/runtime-check-readonly.ll | 20 +- .../Transforms/LoopVectorize/runtime-drop-crash.ll | 17 +- llvm/test/Transforms/LoopVectorize/safegep.ll | 24 +- .../Transforms/LoopVectorize/same-base-access.ll | 56 +- .../Transforms/LoopVectorize/scalable-assume.ll | 36 +- .../scalable-first-order-recurrence.ll | 61 +- .../LoopVectorize/scalable-inductions.ll | 106 +- .../scalable-loop-unpredicated-body-scalar-tail.ll | 40 +- .../LoopVectorize/scalable-noalias-scope-decl.ll | 36 +- .../LoopVectorize/scalable-reduction-inloop.ll | 10 +- .../LoopVectorize/scalable-trunc-min-bitwidth.ll | 33 +- .../Transforms/LoopVectorize/scalable-vf-hint.ll | 14 +- .../test/Transforms/LoopVectorize/scalar-select.ll | 12 +- .../LoopVectorize/scev-during-mutation.ll | 8 +- .../LoopVectorize/select-cmp-predicated.ll | 44 +- llvm/test/Transforms/LoopVectorize/select-cmp.ll | 54 +- .../test/Transforms/LoopVectorize/simple-unroll.ll | 6 +- .../Transforms/LoopVectorize/skip-iterations.ll | 28 +- llvm/test/Transforms/LoopVectorize/small-loop.ll | 24 +- .../Transforms/LoopVectorize/start-non-zero.ll | 8 +- .../Transforms/LoopVectorize/store-shuffle-bug.ll | 16 +- .../test/Transforms/LoopVectorize/struct_access.ll | 12 +- llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll | 40 +- llvm/test/Transforms/LoopVectorize/tripcount.ll | 48 +- .../Transforms/LoopVectorize/trunc-reductions.ll | 57 +- .../Transforms/LoopVectorize/undef-inst-bug.ll | 4 +- .../unroll-novec-memcheck-metadata.ll | 12 +- llvm/test/Transforms/LoopVectorize/unroll.ll | 6 +- .../Transforms/LoopVectorize/unroll_nonlatch.ll | 26 +- llvm/test/Transforms/LoopVectorize/unroll_novec.ll | 16 +- .../Transforms/LoopVectorize/unsafe-dep-remark.ll | 28 +- .../LoopVectorize/unsafe-vf-hint-remark.ll | 14 +- .../test/Transforms/LoopVectorize/value-ptr-bug.ll | 24 +- .../LoopVectorize/vect-phiscev-sext-trunc.ll | 32 +- llvm/test/Transforms/LoopVectorize/vect.stats.ll | 22 +- llvm/test/Transforms/LoopVectorize/vector-geps.ll | 46 +- .../LoopVectorize/vectorize-pointer-phis.ll | 66 +- .../Transforms/LoopVectorize/vectorizeVFone.ll | 8 +- .../Transforms/LoopVectorize/version-mem-access.ll | 28 +- .../Transforms/LoopVectorize/vplan-dot-printing.ll | 10 +- .../LoopVectorize/vplan-printing-outer-loop.ll | 8 +- .../vplan-sink-scalars-and-merge-vf1.ll | 10 +- .../vplan-stress-test-no-explict-vf.ll | 8 +- .../vplan-widen-select-instruction.ll | 40 +- .../LoopVectorize/vplan_hcfg_stress_test.ll | 10 +- llvm/test/Transforms/LoopVectorize/write-only.ll | 16 +- 371 files changed, 9572 insertions(+), 9995 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll index e1b922f..279d4e8 100644 --- a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll +++ b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll @@ -6,15 +6,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;CHECK: icmp eq <4 x i32> ;CHECK: select <4 x i1> ;CHECK: ret i32 -define i32 @foo(i32 %x, i32 %t, i32* nocapture %A) nounwind uwtable ssp { +define i32 @foo(i32 %x, i32 %t, ptr nocapture %A) nounwind uwtable ssp { entry: %cmp10 = icmp sgt i32 %x, 0 br i1 %cmp10, label %for.body, label %for.end for.body: ; preds = %entry, %if.end %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 %tobool = icmp eq i32 %0, 0 br i1 %tobool, label %if.end, label %if.then @@ -28,7 +28,7 @@ if.then: ; preds = %for.body if.end: ; preds = %for.body, %if.then %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ] - store i32 %z.0, i32* %arrayidx, align 4 + store i32 %z.0, ptr %arrayidx, align 4 %indvars.iv.next = add nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %x diff --git a/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll index 7118668..e6ac85d 100644 --- a/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll +++ b/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll @@ -8,41 +8,38 @@ module asm "\09.ident\09\22GCC: (GNU) 4.6.3 LLVM: 3.2svn\22" @b = common global [32000 x float] zeroinitializer, align 16 -define i32 @set1ds(i32 %_n, float* nocapture %arr, float %value, i32 %stride) nounwind uwtable { +define i32 @set1ds(i32 %_n, ptr nocapture %arr, float %value, i32 %stride) nounwind uwtable { entry: %0 = icmp sgt i32 %_n, 0 br i1 %0, label %"3.lr.ph", label %"5" "3.lr.ph": ; preds = %entry - %1 = bitcast float* %arr to i8* - %2 = sext i32 %stride to i64 + %1 = sext i32 %stride to i64 br label %"3" "3": ; preds = %"3.lr.ph", %"3" %indvars.iv = phi i64 [ 0, %"3.lr.ph" ], [ %indvars.iv.next, %"3" ] - %3 = shl nsw i64 %indvars.iv, 2 - %4 = getelementptr inbounds i8, i8* %1, i64 %3 - %5 = bitcast i8* %4 to float* - store float %value, float* %5, align 4 - %indvars.iv.next = add i64 %indvars.iv, %2 - %6 = trunc i64 %indvars.iv.next to i32 - %7 = icmp slt i32 %6, %_n - br i1 %7, label %"3", label %"5" + %2 = shl nsw i64 %indvars.iv, 2 + %3 = getelementptr inbounds i8, ptr %arr, i64 %2 + store float %value, ptr %3, align 4 + %indvars.iv.next = add i64 %indvars.iv, %1 + %4 = trunc i64 %indvars.iv.next to i32 + %5 = icmp slt i32 %4, %_n + br i1 %5, label %"3", label %"5" "5": ; preds = %"3", %entry ret i32 0 } -define i32 @init(i8* nocapture %name) unnamed_addr nounwind uwtable { +define i32 @init(ptr nocapture %name) unnamed_addr nounwind uwtable { entry: br label %"3" "3": ; preds = %"3", %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"3" ] %0 = shl nsw i64 %indvars.iv, 2 - %1 = getelementptr inbounds i8, i8* bitcast (float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0 - %2 = bitcast i8* %1 to float* - store float -1.000000e+00, float* %2, align 4 + %1 = getelementptr inbounds i8, ptr getelementptr inbounds ([32000 x float], ptr @b, i64 0, i64 16000), i64 %0 + store float -1.000000e+00, ptr %1, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 16000 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll index a7bf052..798a9cf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll @@ -7,7 +7,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" -define void @foo(float* noalias nocapture %ptrA, float* noalias nocapture readonly %ptrB, i64 %size) { +define void @foo(ptr noalias nocapture %ptrA, ptr noalias nocapture readonly %ptrB, i64 %size) { ; CHECK-LABEL: @foo( ; CHECK: fmul <4 x float> ; @@ -20,12 +20,12 @@ for.cond: ; preds = %for.body, %entry br i1 %exitcond, label %for.cond.cleanup, label %for.body for.body: ; preds = %for.cond - %arrayidx = getelementptr inbounds float, float* %ptrB, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %ptrA, i64 %indvars.iv - %1 = load float, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %ptrB, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %ptrA, i64 %indvars.iv + %1 = load float, ptr %arrayidx2, align 4 %mul3 = fmul float %0, %1 - store float %mul3, float* %arrayidx2, align 4 + store float %mul3, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 br label %for.cond, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll index 5309f03..cc6ef0f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll @@ -16,16 +16,15 @@ target triple = "aarch64--linux-gnu" ; COST: LV: Found an estimated cost of 4 for VF 2 For instruction: %var4 = udiv i64 %var2, %var3 ; ; -define i64 @predicated_udiv_scalarized_operand(i64* %a, i64 %x) optsize { +define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) optsize { ; CHECK-LABEL: @predicated_udiv_scalarized_operand( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[PRED_UDIV_CONTINUE2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i64> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] @@ -64,8 +63,8 @@ entry: for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] %r = phi i64 [ 0, %entry ], [ %var6, %for.inc ] - %var0 = getelementptr inbounds i64, i64* %a, i64 %i - %var2 = load i64, i64* %var0, align 4 + %var0 = getelementptr inbounds i64, ptr %a, i64 %i + %var2 = load i64, ptr %var0, align 4 %cond0 = icmp sgt i64 %var2, 0 br i1 %cond0, label %if.then, label %for.inc diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll index b9f97b0..4df0ad9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll @@ -2,7 +2,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; Function Attrs: nounwind -define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) { +define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr %c, i32 %size) { ;CHECK-LABEL: array_add ;CHECK: load <4 x i32> ;CHECK: load <4 x i32> @@ -22,13 +22,13 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 - %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv - store i32 %add, i32* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv + store i32 %add, ptr %arrayidx4, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %size @@ -38,5 +38,5 @@ for.end.loopexit: ; preds = %for.body br label %for.end for.end: ; preds = %for.end.loopexit, %entry - ret i32* %c + ret ptr %c } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll index 1e91674..8e963fc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll @@ -2,7 +2,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; Function Attrs: nounwind -define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) { +define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr %c, i32 %size) { ;CHECK-LABEL: array_add ;CHECK: load <4 x i32> ;CHECK: load <4 x i32> @@ -22,13 +22,13 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 - %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv - store i32 %add, i32* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv + store i32 %add, ptr %arrayidx4, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %size @@ -38,5 +38,5 @@ for.end.loopexit: ; preds = %for.body br label %for.end for.end: ; preds = %for.end.loopexit, %entry - ret i32* %c + ret ptr %c } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll index d7d8bc8..c47a630 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll @@ -12,9 +12,9 @@ ; CHECK-LABEL: test_sge ; CHECK-LABEL: vector.scevcheck ; CHECK-LABEL: vector.body -define void @test_sge(i32* noalias %A, - i32* noalias %B, - i32* noalias %C, i32 %N) { +define void @test_sge(ptr noalias %A, + ptr noalias %B, + ptr noalias %C, i32 %N) { entry: %cmp13 = icmp eq i32 %N, 0 br i1 %cmp13, label %for.end, label %for.body.preheader @@ -27,15 +27,15 @@ for.body: %indvars.next = add i16 %indvars.iv, 1 %indvars.ext = zext i16 %indvars.iv to i32 - %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext - %1 = load i32, i32* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext + %1 = load i32, ptr %arrayidx3, align 4 %mul4 = mul i32 %1, %0 - %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext - store i32 %mul4, i32* %arrayidx7, align 4 + %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext + store i32 %mul4, ptr %arrayidx7, align 4 %exitcond = icmp sge i32 %indvars.ext, %N br i1 %exitcond, label %for.end.loopexit, label %for.body @@ -50,9 +50,9 @@ for.end: ; CHECK-LABEL: test_uge ; CHECK-LABEL: vector.scevcheck ; CHECK-LABEL: vector.body -define void @test_uge(i32* noalias %A, - i32* noalias %B, - i32* noalias %C, i32 %N, i32 %Offset) { +define void @test_uge(ptr noalias %A, + ptr noalias %B, + ptr noalias %C, i32 %N, i32 %Offset) { entry: %cmp13 = icmp eq i32 %N, 0 br i1 %cmp13, label %for.end, label %for.body.preheader @@ -67,15 +67,15 @@ for.body: %indvars.ext = sext i16 %indvars.iv to i32 %indvars.access = add i32 %Offset, %indvars.ext - %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access - %1 = load i32, i32* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.access + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.access + %1 = load i32, ptr %arrayidx3, align 4 %mul4 = add i32 %1, %0 - %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access - store i32 %mul4, i32* %arrayidx7, align 4 + %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.access + store i32 %mul4, ptr %arrayidx7, align 4 %exitcond = icmp uge i32 %indvars.ext, %N br i1 %exitcond, label %for.end.loopexit, label %for.body @@ -90,9 +90,9 @@ for.end: ; CHECK-LABEL: test_ule ; CHECK-LABEL: vector.scevcheck ; CHECK-LABEL: vector.body -define void @test_ule(i32* noalias %A, - i32* noalias %B, - i32* noalias %C, i32 %N, +define void @test_ule(ptr noalias %A, + ptr noalias %B, + ptr noalias %C, i32 %N, i16 %M) { entry: %cmp13 = icmp eq i32 %N, 0 @@ -106,15 +106,15 @@ for.body: %indvars.next = sub i16 %indvars.iv, 1 %indvars.ext = zext i16 %indvars.iv to i32 - %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext - %1 = load i32, i32* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext + %1 = load i32, ptr %arrayidx3, align 4 %mul4 = mul i32 %1, %0 - %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext - store i32 %mul4, i32* %arrayidx7, align 4 + %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext + store i32 %mul4, ptr %arrayidx7, align 4 %exitcond = icmp ule i32 %indvars.ext, %N br i1 %exitcond, label %for.end.loopexit, label %for.body @@ -129,9 +129,9 @@ for.end: ; CHECK-LABEL: test_sle ; CHECK-LABEL: vector.scevcheck ; CHECK-LABEL: vector.body -define void @test_sle(i32* noalias %A, - i32* noalias %B, - i32* noalias %C, i32 %N, +define void @test_sle(ptr noalias %A, + ptr noalias %B, + ptr noalias %C, i32 %N, i16 %M) { entry: %cmp13 = icmp eq i32 %N, 0 @@ -145,15 +145,15 @@ for.body: %indvars.next = sub i16 %indvars.iv, 1 %indvars.ext = sext i16 %indvars.iv to i32 - %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext - %1 = load i32, i32* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext + %1 = load i32, ptr %arrayidx3, align 4 %mul4 = mul i32 %1, %0 - %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext - store i32 %mul4, i32* %arrayidx7, align 4 + %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext + store i32 %mul4, ptr %arrayidx7, align 4 %exitcond = icmp sle i32 %indvars.ext, %N br i1 %exitcond, label %for.end.loopexit, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 2236cfe..c0b2073 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -14,14 +14,14 @@ target triple = "aarch64" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" -define void @f1(i32* %A) #0 { +define void @f1(ptr %A) #0 { entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv - store i32 1, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %iv + store i32 1, ptr %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp ne i64 %iv.next, 1024 br i1 %exitcond, label %for.body, label %exit diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll index 2a6cb5a..113a361 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll @@ -7,7 +7,7 @@ ; maximum VF for NEON is calculated by 128/size of smallest type in loop. ; And while we don't have an instruction to load 4 x i8, vectorization ; might still be profitable. -define void @test_load_i8_store_i32(i8* noalias %src, i32* noalias %dst, i32 %off, i64 %N) { +define void @test_load_i8_store_i32(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) { ; CHECK-LABEL: @test_load_i8_store_i32( ; CHECK: <16 x i8> ; @@ -16,12 +16,12 @@ entry: loop: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] - %gep.src = getelementptr inbounds i8, i8* %src, i64 %iv - %lv = load i8, i8* %gep.src, align 1 + %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv + %lv = load i8, ptr %gep.src, align 1 %lv.ext = zext i8 %lv to i32 %add = add i32 %lv.ext, %off - %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv - store i32 %add, i32* %gep.dst + %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv + store i32 %add, ptr %gep.dst %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %exit, label %loop @@ -31,7 +31,7 @@ exit: } ; Same as test_load_i8_store_i32, but with types flipped for load and store. -define void @test_load_i32_store_i8(i32* noalias %src, i8* noalias %dst, i32 %off, i64 %N) { +define void @test_load_i32_store_i8(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) { ; CHECK-LABEL: @test_load_i32_store_i8( ; CHECK: <16 x i8> ; @@ -40,12 +40,12 @@ entry: loop: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] - %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv - %lv = load i32, i32* %gep.src, align 1 + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %lv = load i32, ptr %gep.src, align 1 %add = add i32 %lv, %off %add.trunc = trunc i32 %add to i8 - %gep.dst = getelementptr inbounds i8, i8* %dst, i64 %iv - store i8 %add.trunc, i8* %gep.dst + %gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv + store i8 %add.trunc, ptr %gep.dst %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %exit, label %loop @@ -55,7 +55,7 @@ exit: } ; All memory operations use i32, all memory operations are profitable with VF 4. -define void @test_load_i32_store_i32(i32* noalias %src, i32* noalias %dst, i8 %off, i64 %N) { +define void @test_load_i32_store_i32(ptr noalias %src, ptr noalias %dst, i8 %off, i64 %N) { ; CHECK-LABEL: @test_load_i32_store_i32( ; CHECK: vector.body: ; CHECK: <4 x i32> @@ -65,13 +65,13 @@ entry: loop: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] - %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv - %lv = load i32, i32* %gep.src, align 1 + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %lv = load i32, ptr %gep.src, align 1 %lv.trunc = trunc i32 %lv to i8 %add = add i8 %lv.trunc, %off %add.ext = zext i8 %add to i32 - %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv - store i32 %add.ext, i32* %gep.dst + %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv + store i32 %add.ext, ptr %gep.dst %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %exit, label %loop @@ -83,7 +83,7 @@ exit: ; Test with loop body that requires a large number of vector registers if the ; vectorization factor is large. Make sure the register estimates limit the ; vectorization factor. -define void @test_load_i8_store_i64_large(i8* noalias %src, i64* noalias %dst, i64* noalias %dst.2, i64* noalias %dst.3, i64* noalias %dst.4, i64* noalias %dst.5, i64%off, i64 %off.2, i64 %N) { +define void @test_load_i8_store_i64_large(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, ptr noalias %dst.3, ptr noalias %dst.4, ptr noalias %dst.5, i64%off, i64 %off.2, i64 %N) { ; CHECK-LABEL: @test_load_i8_store_i64_large ; CHECK: <8 x i64> ; @@ -92,28 +92,28 @@ entry: loop: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] - %gep.src = getelementptr inbounds i8, i8* %src, i64 %iv - %gep.dst.3 = getelementptr inbounds i64, i64* %dst.3, i64 %iv - %lv.dst.3 = load i64, i64* %gep.dst.3, align 1 - %gep.dst.5 = getelementptr inbounds i64, i64* %dst.5, i64 %iv - %lv.dst.5 = load i64, i64* %gep.dst.3, align 1 + %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv + %gep.dst.3 = getelementptr inbounds i64, ptr %dst.3, i64 %iv + %lv.dst.3 = load i64, ptr %gep.dst.3, align 1 + %gep.dst.5 = getelementptr inbounds i64, ptr %dst.5, i64 %iv + %lv.dst.5 = load i64, ptr %gep.dst.3, align 1 - %lv = load i8, i8* %gep.src, align 1 + %lv = load i8, ptr %gep.src, align 1 %lv.ext = zext i8 %lv to i64 %add = add i64 %lv.ext, %off %add.2 = add i64 %add, %off.2 - %gep.dst = getelementptr inbounds i64, i64* %dst, i64 %iv - %gep.dst.2 = getelementptr inbounds i64, i64* %dst.2, i64 %iv + %gep.dst = getelementptr inbounds i64, ptr %dst, i64 %iv + %gep.dst.2 = getelementptr inbounds i64, ptr %dst.2, i64 %iv %add.3 = add i64 %add.2, %lv.dst.3 %add.4 = add i64 %add.3, %add - %gep.dst.4 = getelementptr inbounds i64, i64* %dst.4, i64 %iv + %gep.dst.4 = getelementptr inbounds i64, ptr %dst.4, i64 %iv %add.5 = add i64 %add.2, %lv.dst.5 - store i64 %add.2, i64* %gep.dst.2 - store i64 %add, i64* %gep.dst - store i64 %add.3, i64* %gep.dst.3 - store i64 %add.4, i64* %gep.dst.4 - store i64 %add.5, i64* %gep.dst.5 + store i64 %add.2, ptr %gep.dst.2 + store i64 %add, ptr %gep.dst + store i64 %add.3, ptr %gep.dst.3 + store i64 %add.4, ptr %gep.dst.4 + store i64 %add.5, ptr %gep.dst.5 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll index 4671c6f..8079c1e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll @@ -30,32 +30,32 @@ for.body: %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ] %add = add i64 %v.055, %offset %mul = mul i64 %add, 3 - %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055 - %1 = load float, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %mul + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 %v.055 + %1 = load float, ptr %arrayidx2, align 4 %mul3 = fmul fast float %0, %1 - %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055 - %2 = load float, float* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 %v.055 + %2 = load float, ptr %arrayidx4, align 4 %mul5 = fmul fast float %mul3, %2 - %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055 - %3 = load float, float* %arrayidx6, align 4 + %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 %v.055 + %3 = load float, ptr %arrayidx6, align 4 %mul7 = fmul fast float %mul5, %3 - %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055 - %4 = load float, float* %arrayidx8, align 4 + %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 %v.055 + %4 = load float, ptr %arrayidx8, align 4 %mul9 = fmul fast float %mul7, %4 %add10 = fadd fast float %r.057, %mul9 %arrayidx.sum = add i64 %mul, 1 - %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum - %5 = load float, float* %arrayidx11, align 4 + %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum + %5 = load float, ptr %arrayidx11, align 4 %mul13 = fmul fast float %1, %5 %mul15 = fmul fast float %2, %mul13 %mul17 = fmul fast float %3, %mul15 %mul19 = fmul fast float %4, %mul17 %add20 = fadd fast float %g.056, %mul19 %arrayidx.sum52 = add i64 %mul, 2 - %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52 - %6 = load float, float* %arrayidx21, align 4 + %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum52 + %6 = load float, ptr %arrayidx21, align 4 %mul23 = fmul fast float %1, %6 %mul25 = fmul fast float %2, %mul23 %mul27 = fmul fast float %3, %mul25 @@ -78,8 +78,8 @@ for.end: %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ] %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ] - store i8 %r.0.lcssa, i8* @r_, align 1 - store i8 %g.0.lcssa, i8* @g_, align 1 - store i8 %b.0.lcssa, i8* @b_, align 1 + store i8 %r.0.lcssa, ptr @r_, align 1 + store i8 %g.0.lcssa, ptr @g_, align 1 + store i8 %b.0.lcssa, ptr @b_, align 1 ret void } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll index 99a5909..fe9631a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll @@ -11,22 +11,22 @@ target triple = "aarch64--linux-gnu" ; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 5 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDUCTION]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDUCTION1]] to i32 -; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP4]] -; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP5]] -; CHECK-NEXT: store i32 0, i32* [[GEP0]], align 4 -; CHECK-NEXT: store i32 0, i32* [[GEP1]], align 4 +; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr %dst, i32 [[TMP4]] +; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i32, ptr %dst, i32 [[TMP5]] +; CHECK-NEXT: store i32 0, ptr [[GEP0]], align 4 +; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body ; -define void @non_primary_iv_trunc_free(i64 %n, i32* %dst) { +define void @non_primary_iv_trunc_free(i64 %n, ptr %dst) { entry: br label %for.body for.body: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] %tmp0 = trunc i64 %i to i32 - %gep.dst = getelementptr inbounds i32, i32* %dst, i32 %tmp0 - store i32 0, i32* %gep.dst + %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %tmp0 + store i32 0, ptr %gep.dst %i.next = add nuw nsw i64 %i, 5 %cond = icmp slt i64 %i.next, %n br i1 %cond, label %for.body, label %for.end diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll index 5646ce6..7878121 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll @@ -2,26 +2,25 @@ ; In the loop below, both the current and previous values of a first-order ; recurrence are stored in an interleave group. -define void @interleaved_store_first_order_recurrence(i32* noalias %src, i32* %dst) { +define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst) { ; CHECK-LABEL: @interleaved_store_first_order_recurrence( ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[SRC:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC:%.*]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 2 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 -2 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <12 x i32>* +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -2 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i32> [[TMP11]], <12 x i32> poison, <12 x i32> -; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP12]], label %middle.block, label %vector.body @@ -32,14 +31,14 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %for = phi i32 [ 99, %entry ],[ %for.next, %loop ] - %for.next = load i32, i32* %src, align 4 + %for.next = load i32, ptr %src, align 4 %off = mul nuw nsw i64 %iv, 3 - %gep.1 = getelementptr inbounds i32, i32* %dst, i64 %off - store i32 0, i32* %gep.1, align 4 - %gep.2 = getelementptr inbounds i32, i32* %gep.1, i64 1 - store i32 %for, i32* %gep.2, align 4 - %gep.3 = getelementptr inbounds i32, i32* %gep.1, i64 2 - store i32 %for.next, i32* %gep.3, align 4 + %gep.1 = getelementptr inbounds i32, ptr %dst, i64 %off + store i32 0, ptr %gep.1, align 4 + %gep.2 = getelementptr inbounds i32, ptr %gep.1, i64 1 + store i32 %for, ptr %gep.2, align 4 + %gep.3 = getelementptr inbounds i32, ptr %gep.1, i64 2 + store i32 %for.next, ptr %gep.3, align 4 %iv.next = add nuw nsw i64 %iv, 1 %ec = icmp eq i64 %iv.next, 1000 br i1 %ec, label %exit, label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll index 4add3f6..0184daf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll @@ -17,19 +17,19 @@ target triple = "aarch64--linux-gnu" ; CHECK: insertelement <2 x i8> [[INSERT]], i8 [[LOAD2]], i32 1 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body -define void @test(%pair* %p, i8* %q, i64 %n) { +define void @test(ptr %p, ptr %q, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %tmp0 = getelementptr %pair, %pair* %p, i64 %i, i32 0 - %tmp1 = load i8, i8* %tmp0, align 1 - %tmp2 = getelementptr %pair, %pair* %p, i64 %i, i32 1 - %tmp3 = load i8, i8* %tmp2, align 1 + %tmp0 = getelementptr %pair, ptr %p, i64 %i, i32 0 + %tmp1 = load i8, ptr %tmp0, align 1 + %tmp2 = getelementptr %pair, ptr %p, i64 %i, i32 1 + %tmp3 = load i8, ptr %tmp2, align 1 %add = add i8 %tmp1, %tmp3 - %qi = getelementptr i8, i8* %q, i64 %i - store i8 %add, i8* %qi, align 1 + %qi = getelementptr i8, ptr %q, i64 %i + store i8 %add, ptr %qi, align 1 %i.next = add nuw nsw i64 %i, 1 %cond = icmp eq i64 %i.next, %n br i1 %cond, label %for.end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll index 0d145ef..c61ddbc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll @@ -8,28 +8,28 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnueabi" %i8.2 = type {i8, i8} -define void @i8_factor_2(%i8.2* %data, i64 %n) { +define void @i8_factor_2(ptr %data, i64 %n) { entry: br label %for.body ; VF_8-LABEL: Checking a loop in 'i8_factor_2' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 +; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1 +; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1 ; VF_16-LABEL: Checking a loop in 'i8_factor_2' -; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1 +; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1 +; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0 - %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1 - %tmp2 = load i8, i8* %tmp0, align 1 - %tmp3 = load i8, i8* %tmp1, align 1 - store i8 0, i8* %tmp0, align 1 - store i8 0, i8* %tmp1, align 1 + %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 + %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1 + %tmp2 = load i8, ptr %tmp0, align 1 + %tmp3 = load i8, ptr %tmp1, align 1 + store i8 0, ptr %tmp0, align 1 + store i8 0, ptr %tmp1, align 1 %i.next = add nuw nsw i64 %i, 1 %cond = icmp slt i64 %i.next, %n br i1 %cond, label %for.body, label %for.end @@ -39,33 +39,33 @@ for.end: } %i16.2 = type {i16, i16} -define void @i16_factor_2(%i16.2* %data, i64 %n) { +define void @i16_factor_2(ptr %data, i64 %n) { entry: br label %for.body ; VF_4-LABEL: Checking a loop in 'i16_factor_2' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 +; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2 +; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2 ; VF_8-LABEL: Checking a loop in 'i16_factor_2' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 +; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2 +; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2 ; VF_16-LABEL: Checking a loop in 'i16_factor_2' -; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2 +; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2 +; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0 - %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1 - %tmp2 = load i16, i16* %tmp0, align 2 - %tmp3 = load i16, i16* %tmp1, align 2 - store i16 0, i16* %tmp0, align 2 - store i16 0, i16* %tmp1, align 2 + %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0 + %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1 + %tmp2 = load i16, ptr %tmp0, align 2 + %tmp3 = load i16, ptr %tmp1, align 2 + store i16 0, ptr %tmp0, align 2 + store i16 0, ptr %tmp1, align 2 %i.next = add nuw nsw i64 %i, 1 %cond = icmp slt i64 %i.next, %n br i1 %cond, label %for.body, label %for.end @@ -75,38 +75,38 @@ for.end: } %i32.2 = type {i32, i32} -define void @i32_factor_2(%i32.2* %data, i64 %n) { +define void @i32_factor_2(ptr %data, i64 %n) { entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_2' -; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 +; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4 +; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4 ; VF_4-LABEL: Checking a loop in 'i32_factor_2' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 +; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 +; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4 +; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4 ; VF_8-LABEL: Checking a loop in 'i32_factor_2' -; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 +; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4 +; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4 ; VF_16-LABEL: Checking a loop in 'i32_factor_2' -; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4 +; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4 +; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0 - %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1 - %tmp2 = load i32, i32* %tmp0, align 4 - %tmp3 = load i32, i32* %tmp1, align 4 - store i32 0, i32* %tmp0, align 4 - store i32 0, i32* %tmp1, align 4 + %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0 + %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1 + %tmp2 = load i32, ptr %tmp0, align 4 + %tmp3 = load i32, ptr %tmp1, align 4 + store i32 0, ptr %tmp0, align 4 + store i32 0, ptr %tmp1, align 4 %i.next = add nuw nsw i64 %i, 1 %cond = icmp slt i64 %i.next, %n br i1 %cond, label %for.body, label %for.end @@ -116,38 +116,38 @@ for.end: } %i64.2 = type {i64, i64} -define void @i64_factor_2(%i64.2* %data, i64 %n) { +define void @i64_factor_2(ptr %data, i64 %n) { entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_2' -; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8 ; VF_4-LABEL: Checking a loop in 'i64_factor_2' -; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8 +; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8 ; VF_8-LABEL: Checking a loop in 'i64_factor_2' -; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8 +; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8 ; VF_16-LABEL: Checking a loop in 'i64_factor_2' -; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8 +; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0 - %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1 - %tmp2 = load i64, i64* %tmp0, align 8 - %tmp3 = load i64, i64* %tmp1, align 8 - store i64 0, i64* %tmp0, align 8 - store i64 0, i64* %tmp1, align 8 + %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0 + %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1 + %tmp2 = load i64, ptr %tmp0, align 8 + %tmp3 = load i64, ptr %tmp1, align 8 + store i64 0, ptr %tmp0, align 8 + store i64 0, ptr %tmp1, align 8 %i.next = add nuw nsw i64 %i, 1 %cond = icmp slt i64 %i.next, %n br i1 %cond, label %for.body, label %for.end @@ -157,7 +157,7 @@ for.end: } %i64.8 = type {i64, i64, i64, i64, i64, i64, i64, i64} -define void @i64_factor_8(%i64.8* %data, i64 %n) { +define void @i64_factor_8(ptr %data, i64 %n) { entry: br label %for.body @@ -168,18 +168,18 @@ entry: ; gaps. ; ; VF_2-LABEL: Checking a loop in 'i64_factor_8' -; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8 +; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %tmp0 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 2 - %tmp1 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 6 - %tmp2 = load i64, i64* %tmp0, align 8 - %tmp3 = load i64, i64* %tmp1, align 8 - store i64 0, i64* %tmp0, align 8 - store i64 0, i64* %tmp1, align 8 + %tmp0 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 2 + %tmp1 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 6 + %tmp2 = load i64, ptr %tmp0, align 8 + %tmp3 = load i64, ptr %tmp1, align 8 + store i64 0, ptr %tmp0, align 8 + store i64 0, ptr %tmp1, align 8 %i.next = add nuw nsw i64 %i, 1 %cond = icmp slt i64 %i.next, %n br i1 %cond, label %for.body, label %for.end diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 63d83a2..e35bc97 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -4,11 +4,11 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64" ; CHECK-LABEL: @add_a( -; CHECK: load <16 x i8>, <16 x i8>* +; CHECK: load <16 x i8>, ptr ; CHECK: add <16 x i8> ; CHECK: store <16 x i8> ; Function Attrs: nounwind -define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 { +define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { entry: %cmp8 = icmp sgt i32 %len, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -18,13 +18,13 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv - %0 = load i8, i8* %arrayidx + %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv + %0 = load i8, ptr %arrayidx %conv = zext i8 %0 to i32 %add = add nuw nsw i32 %conv, 2 %conv1 = trunc i32 %add to i8 - %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv - store i8 %conv1, i8* %arrayidx3 + %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv + store i8 %conv1, ptr %arrayidx3 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -34,11 +34,11 @@ for.body: ; preds = %entry, %for.body ; Ensure that we preserve nuw/nsw if we're not shrinking the values we're ; working with. ; CHECK-LABEL: @add_a1( -; CHECK: load <16 x i8>, <16 x i8>* +; CHECK: load <16 x i8>, ptr ; CHECK: add nuw nsw <16 x i8> ; CHECK: store <16 x i8> ; Function Attrs: nounwind -define void @add_a1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 { +define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { entry: %cmp8 = icmp sgt i32 %len, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -48,11 +48,11 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv - %0 = load i8, i8* %arrayidx + %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv + %0 = load i8, ptr %arrayidx %add = add nuw nsw i8 %0, 2 - %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv - store i8 %add, i8* %arrayidx3 + %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv + store i8 %add, ptr %arrayidx3 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -60,11 +60,11 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: @add_b( -; CHECK: load <8 x i16>, <8 x i16>* +; CHECK: load <8 x i16>, ptr ; CHECK: add <8 x i16> ; CHECK: store <8 x i16> ; Function Attrs: nounwind -define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 { +define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { entry: %cmp9 = icmp sgt i32 %len, 0 br i1 %cmp9, label %for.body, label %for.cond.cleanup @@ -74,13 +74,13 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv - %0 = load i16, i16* %arrayidx + %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv + %0 = load i16, ptr %arrayidx %conv8 = zext i16 %0 to i32 %add = add nuw nsw i32 %conv8, 2 %conv1 = trunc i32 %add to i16 - %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv - store i16 %conv1, i16* %arrayidx3 + %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv + store i16 %conv1, ptr %arrayidx3 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -88,11 +88,11 @@ for.body: ; preds = %entry, %for.body } ; CHECK-LABEL: @add_c( -; CHECK: load <8 x i8>, <8 x i8>* +; CHECK: load <8 x i8>, ptr ; CHECK: add <8 x i16> ; CHECK: store <8 x i16> ; Function Attrs: nounwind -define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 { +define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { entry: %cmp8 = icmp sgt i32 %len, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -102,13 +102,13 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv - %0 = load i8, i8* %arrayidx + %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv + %0 = load i8, ptr %arrayidx %conv = zext i8 %0 to i32 %add = add nuw nsw i32 %conv, 2 %conv1 = trunc i32 %add to i16 - %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv - store i16 %conv1, i16* %arrayidx3 + %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv + store i16 %conv1, ptr %arrayidx3 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -119,7 +119,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: load <8 x i16> ; CHECK: add nsw <8 x i32> ; CHECK: store <8 x i32> -define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 { +define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { entry: %cmp7 = icmp sgt i32 %len, 0 br i1 %cmp7, label %for.body, label %for.cond.cleanup @@ -129,12 +129,12 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv - %0 = load i16, i16* %arrayidx + %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv + %0 = load i16, ptr %arrayidx %conv = sext i16 %0 to i32 %add = add nsw i32 %conv, 2 - %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv - store i32 %add, i32* %arrayidx2 + %arrayidx2 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv + store i32 %add, ptr %arrayidx2 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -151,7 +151,7 @@ for.body: ; preds = %entry, %for.body ; CHECK: xor <16 x i8> ; CHECK: mul <16 x i8> ; CHECK: store <16 x i8> -define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { +define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { entry: %cmp.32 = icmp sgt i32 %len, 0 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup @@ -166,8 +166,8 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %for.body, %for.body.lr.ph %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv - %0 = load i8, i8* %arrayidx + %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv + %0 = load i8, ptr %arrayidx %conv = zext i8 %0 to i32 %add = shl i32 %conv, 4 %conv2 = add nuw nsw i32 %add, 32 @@ -178,8 +178,8 @@ for.body: ; preds = %for.body, %for.body %conv17 = xor i32 %mul.masked, %conv11 %mul18 = mul nuw nsw i32 %conv17, %and %conv19 = trunc i32 %mul18 to i8 - %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv - store i8 %conv19, i8* %arrayidx21 + %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv + store i8 %conv19, ptr %arrayidx21 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -197,7 +197,7 @@ for.body: ; preds = %for.body, %for.body ; CHECK: xor <8 x i8> ; CHECK: mul <8 x i8> ; CHECK: store <8 x i8> -define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { +define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 { entry: %cmp.32 = icmp sgt i32 %len, 0 br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup @@ -212,8 +212,8 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %for.body, %for.body.lr.ph %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv - %0 = load i16, i16* %arrayidx + %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv + %0 = load i16, ptr %arrayidx %conv = sext i16 %0 to i32 %add = shl i32 %conv, 4 %conv2 = add nsw i32 %add, 32 @@ -225,8 +225,8 @@ for.body: ; preds = %for.body, %for.body %conv17 = xor i32 %mul.masked, %conv11 %mul18 = mul nuw nsw i32 %conv17, %and %conv19 = trunc i32 %mul18 to i8 - %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv - store i8 %conv19, i8* %arrayidx21 + %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv + store i8 %conv19, ptr %arrayidx21 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -234,11 +234,11 @@ for.body: ; preds = %for.body, %for.body } ; CHECK-LABEL: @add_phifail( -; CHECK: load <16 x i8>, <16 x i8>* +; CHECK: load <16 x i8>, ptr ; CHECK: add nuw nsw <16 x i32> ; CHECK: store <16 x i8> ; Function Attrs: nounwind -define void @add_phifail(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 { +define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { entry: %cmp8 = icmp sgt i32 %len, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -249,13 +249,13 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv - %0 = load i8, i8* %arrayidx + %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv + %0 = load i8, ptr %arrayidx %conv = zext i8 %0 to i32 %add = add nuw nsw i32 %conv, 2 %conv1 = trunc i32 %add to i8 - %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv - store i8 %conv1, i8* %arrayidx3 + %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv + store i8 %conv1, ptr %arrayidx3 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len @@ -267,10 +267,10 @@ for.body: ; preds = %entry, %for.body ; even when %len exactly divides VF (since we extract from the second last index ; and pass this to the for.cond.cleanup block). Vectorized loop returns ; the correct value a_phi = p[len -2] -define i8 @add_phifail2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 { +define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 { ; CHECK-LABEL: @add_phifail2( ; CHECK: vector.body: -; CHECK: %wide.load = load <16 x i8>, <16 x i8>* +; CHECK: %wide.load = load <16 x i8>, ptr ; CHECK: %[[L1:.+]] = zext <16 x i8> %wide.load to <16 x i32> ; CHECK: add nuw nsw <16 x i32> ; CHECK: store <16 x i8> @@ -293,13 +293,13 @@ for.cond.cleanup: ; preds = %for.body, %entry for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv - %0 = load i8, i8* %arrayidx + %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv + %0 = load i8, ptr %arrayidx %conv = zext i8 %0 to i32 %add = add nuw nsw i32 %conv, 2 %conv1 = trunc i32 %add to i8 - %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv - store i8 %conv1, i8* %arrayidx3 + %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv + store i8 %conv1, ptr %arrayidx3 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %len diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll index b1f0b27..263da07 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll @@ -10,22 +10,21 @@ source_filename = "bugpoint-output-26dbd81.bc" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" -%struct.CvNode1D = type { double, %struct.CvNode1D* } +%struct.CvNode1D = type { double, ptr } ; CHECK-LABEL: @cvCalcEMD2 ; CHECK: vector.body -; CHECK: store <{{[0-9]+}} x %struct.CvNode1D*> -define void @cvCalcEMD2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK: store <{{[0-9]+}} x ptr> +define void @cvCalcEMD2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { entry: br label %for.body14.i.i for.body14.i.i: ; preds = %for.body14.i.i, %entry %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] - %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i - %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0 - store double 0xC415AF1D80000000, double* %val.i.i, align 4 - %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1 - store %struct.CvNode1D* undef, %struct.CvNode1D** %next19.i.i, align 4 + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i + store double 0xC415AF1D80000000, ptr %arrayidx15.i.i1427, align 4 + %next19.i.i = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i, i32 1 + store ptr undef, ptr %next19.i.i, align 4 %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i @@ -36,22 +35,22 @@ for.end22.i.i: ; preds = %for.body14.i.i ; This test checks when a pointer value is stored into a double type. -%struct.CvNode1D2 = type { %struct.CvNode1D2*, double } +%struct.CvNode1D2 = type { ptr, double } ; CHECK-LABEL: @cvCalcEMD2_2 ; CHECK: vector.body ; CHECK: store <{{[0-9]+}} x double> -define void @cvCalcEMD2_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @cvCalcEMD2_2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { entry: br label %for.body14.i.i for.body14.i.i: ; preds = %for.body14.i.i, %entry %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] - %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0 - store %struct.CvNode1D2* undef, %struct.CvNode1D2** %next19.i.i, align 4 - %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i - %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1 - store double 0xC415AF1D80000000, double* %val.i.i, align 4 + %next19.i.i = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i, i32 0 + store ptr undef, ptr %next19.i.i, align 4 + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i + %val.i.i = getelementptr inbounds %struct.CvNode1D2, ptr %arrayidx15.i.i1427, i32 0, i32 1 + store double 0xC415AF1D80000000, ptr %val.i.i, align 4 %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i @@ -65,17 +64,16 @@ for.end22.i.i: ; preds = %for.body14.i.i ; CHECK-LABEL: @cvCalcEMD3 ; CHECK: vector.body ; CHECK: inttoptr <{{[0-9]+}} x i64> -define void @cvCalcEMD3() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @cvCalcEMD3() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { entry: br label %for.body14.i.i for.body14.i.i: ; preds = %for.body14.i.i, %entry %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] - %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i - %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0 - %load_d = load double, double* %val.i.i, align 4 - %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1 - %load_p = load %struct.CvNode1D*, %struct.CvNode1D** %next19.i.i, align 4 + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i + %load_d = load double, ptr %arrayidx15.i.i1427, align 4 + %next19.i.i = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i, i32 1 + %load_p = load ptr, ptr %next19.i.i, align 4 %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i @@ -88,18 +86,18 @@ for.end22.i.i: ; preds = %for.body14.i.i ; CHECK-LABEL: @cvCalcEMD3_2 ; CHECK: vector.body -; CHECK: ptrtoint <{{[0-9]+}} x %struct.CvNode1D2*> -define void @cvCalcEMD3_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK: ptrtoint <{{[0-9]+}} x ptr> +define void @cvCalcEMD3_2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { entry: br label %for.body14.i.i for.body14.i.i: ; preds = %for.body14.i.i, %entry %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ] - %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0 - %load_p = load %struct.CvNode1D2*, %struct.CvNode1D2** %next19.i.i, align 4 - %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i - %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1 - %load_d = load double, double* %val.i.i, align 4 + %next19.i.i = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i, i32 0 + %load_p = load ptr, ptr %next19.i.i, align 4 + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i + %val.i.i = getelementptr inbounds %struct.CvNode1D2, ptr %arrayidx15.i.i1427, i32 0, i32 1 + %load_d = load double, ptr %val.i.i, align 4 %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index eb96768..d1e9f25 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux-gnu" ; A call whose argument must be widened. We check that tail folding uses the ; primary mask, and that without tail folding we synthesize an all-true mask. -define void @test_widen(i64* noalias %a, i64* readnone %b) #4 { +define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen( ; TFNONE-NEXT: entry: ; TFNONE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -15,18 +15,16 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFNONE-NEXT: [[TMP0:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]] -; TFNONE-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>* -; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 +; TFNONE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4 ; TFNONE-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; TFNONE-NEXT: [[TMP3:%.*]] = call i64 @foo(i64 [[TMP2]]) #[[ATTR2:[0-9]+]] ; TFNONE-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; TFNONE-NEXT: [[TMP5:%.*]] = call i64 @foo(i64 [[TMP4]]) #[[ATTR2]] ; TFNONE-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 ; TFNONE-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP5]], i32 1 -; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; TFNONE-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>* -; TFNONE-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP9]], align 4 +; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFNONE-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP8]], align 4 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; TFNONE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TFNONE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -37,11 +35,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] ; TFNONE: for.body: ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2]] -; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] @@ -53,11 +51,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 { ; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] ; TFALWAYS: for.body: ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]] -; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -71,18 +69,16 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP0:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>* -; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4 +; TFFALLBACK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4 ; TFFALLBACK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; TFFALLBACK-NEXT: [[TMP3:%.*]] = call i64 @foo(i64 [[TMP2]]) #[[ATTR2:[0-9]+]] ; TFFALLBACK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; TFFALLBACK-NEXT: [[TMP5:%.*]] = call i64 @foo(i64 [[TMP4]]) #[[ATTR2]] ; TFFALLBACK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 ; TFFALLBACK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP5]], i32 1 -; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>* -; TFFALLBACK-NEXT: store <2 x i64> [[TMP7]], <2 x i64>* [[TMP9]], align 4 +; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP8]], align 4 ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; TFFALLBACK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TFFALLBACK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -93,11 +89,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] ; TFFALLBACK: for.body: ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2]] -; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] @@ -109,11 +105,11 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gep = getelementptr i64, i64* %b, i64 %indvars.iv - %load = load i64, i64* %gep + %gep = getelementptr i64, ptr %b, i64 %indvars.iv + %load = load i64, ptr %gep %call = call i64 @foo(i64 %load) #1 - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv - store i64 %call, i64* %arrayidx + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %call, ptr %arrayidx %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1024 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -123,14 +119,14 @@ for.cond.cleanup: } ; Check that a simple conditional call can be vectorized. -define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 { +define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_if_then( ; TFNONE-NEXT: entry: ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] ; TFNONE: for.body: ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50 ; TFNONE-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] ; TFNONE: if.then: @@ -138,8 +134,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[IF_END]] ; TFNONE: if.end: ; TFNONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] -; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8 +; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -151,8 +147,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 { ; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] ; TFALWAYS: for.body: ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; TFALWAYS-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50 ; TFALWAYS-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] ; TFALWAYS: if.then: @@ -160,8 +156,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 { ; TFALWAYS-NEXT: br label [[IF_END]] ; TFALWAYS: if.end: ; TFALWAYS-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] -; TFALWAYS-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8 +; TFALWAYS-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -173,8 +169,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] ; TFFALLBACK: for.body: ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; TFFALLBACK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50 ; TFFALLBACK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] ; TFFALLBACK: if.then: @@ -182,8 +178,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[IF_END]] ; TFFALLBACK: if.end: ; TFFALLBACK-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] -; TFFALLBACK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8 +; TFFALLBACK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -195,8 +191,8 @@ entry: for.body: %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + %0 = load i64, ptr %arrayidx, align 8 %cmp = icmp ugt i64 %0, 50 br i1 %cmp, label %if.then, label %if.end @@ -206,8 +202,8 @@ if.then: if.end: %2 = phi i64 [%1, %if.then], [0, %for.body] - %arrayidx1 = getelementptr inbounds i64, i64* %b, i64 %indvars.iv - store i64 %2, i64* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv + store i64 %2, ptr %arrayidx1, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1024 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -220,14 +216,14 @@ for.cond.cleanup: ; calls inside the conditional blocks. Although one of the calls has a ; uniform parameter and the metadata lists a uniform variant, right now ; we just see a splat of the parameter instead. More work needed. -define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 { +define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen_if_then_else( ; TFNONE-NEXT: entry: ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] ; TFNONE: for.body: ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50 ; TFNONE-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TFNONE: if.then: @@ -238,8 +234,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[IF_END]] ; TFNONE: if.end: ; TFNONE-NEXT: [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ] -; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8 +; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -251,8 +247,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 { ; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] ; TFALWAYS: for.body: ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; TFALWAYS-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50 ; TFALWAYS-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TFALWAYS: if.then: @@ -263,8 +259,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 { ; TFALWAYS-NEXT: br label [[IF_END]] ; TFALWAYS: if.end: ; TFALWAYS-NEXT: [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ] -; TFALWAYS-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8 +; TFALWAYS-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -276,8 +272,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] ; TFFALLBACK: for.body: ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; TFFALLBACK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50 ; TFFALLBACK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; TFFALLBACK: if.then: @@ -288,8 +284,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[IF_END]] ; TFFALLBACK: if.end: ; TFFALLBACK-NEXT: [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ] -; TFFALLBACK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8 +; TFFALLBACK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -301,8 +297,8 @@ entry: for.body: %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + %0 = load i64, ptr %arrayidx, align 8 %cmp = icmp ugt i64 %0, 50 br i1 %cmp, label %if.then, label %if.else @@ -316,8 +312,8 @@ if.else: if.end: %3 = phi i64 [%1, %if.then], [%2, %if.else] - %arrayidx1 = getelementptr inbounds i64, i64* %b, i64 %indvars.iv - store i64 %3, i64* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv + store i64 %3, ptr %arrayidx1, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1024 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -329,7 +325,7 @@ for.cond.cleanup: ; A call whose argument must be widened, where the vector variant does not have ; a mask. Forcing tail folding results in no vectorized call, whereas an ; unpredicated body with scalar tail can use the unmasked variant. -define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 { +define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen_nomask( ; TFNONE-NEXT: entry: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -344,13 +340,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFNONE-NEXT: [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]] -; TFNONE-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to * -; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 +; TFNONE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; TFNONE-NEXT: [[TMP6:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) -; TFNONE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; TFNONE-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to * -; TFNONE-NEXT: store [[TMP6]], * [[TMP8]], align 4 +; TFNONE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFNONE-NEXT: store [[TMP6]], ptr [[TMP7]], align 4 ; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -364,11 +358,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] ; TFNONE: for.body: ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]] -; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -380,11 +374,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 { ; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] ; TFALWAYS: for.body: ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]] -; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -405,13 +399,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to * -; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 +; TFFALLBACK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; TFFALLBACK-NEXT: [[TMP6:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) -; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to * -; TFFALLBACK-NEXT: store [[TMP6]], * [[TMP8]], align 4 +; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: store [[TMP6]], ptr [[TMP7]], align 4 ; TFFALLBACK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -425,11 +417,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] ; TFFALLBACK: for.body: ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]] -; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -441,11 +433,11 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gep = getelementptr i64, i64* %b, i64 %indvars.iv - %load = load i64, i64* %gep + %gep = getelementptr i64, ptr %b, i64 %indvars.iv + %load = load i64, ptr %gep %call = call i64 @foo(i64 %load) #2 - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv - store i64 %call, i64* %arrayidx + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %call, ptr %arrayidx %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1024 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -457,7 +449,7 @@ for.cond.cleanup: ; If both masked and unmasked options are present, we expect to see tail folding ; use the masked version and unpredicated body with scalar tail use the unmasked ; version. -define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 { +define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-LABEL: @test_widen_optmask( ; TFNONE-NEXT: entry: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -472,13 +464,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFNONE-NEXT: [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]] -; TFNONE-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to * -; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 +; TFNONE-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFNONE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; TFNONE-NEXT: [[TMP6:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) -; TFNONE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; TFNONE-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to * -; TFNONE-NEXT: store [[TMP6]], * [[TMP8]], align 4 +; TFNONE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFNONE-NEXT: store [[TMP6]], ptr [[TMP7]], align 4 ; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -492,11 +482,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 { ; TFNONE-NEXT: br label [[FOR_BODY:%.*]] ; TFNONE: for.body: ; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] -; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]] -; TFNONE-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -508,11 +498,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 { ; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] ; TFALWAYS: for.body: ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]] -; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]] -; TFALWAYS-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] +; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] @@ -533,13 +523,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TFFALLBACK-NEXT: [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to * -; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 +; TFFALLBACK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; TFFALLBACK-NEXT: [[TMP6:%.*]] = call @foo_vector_nomask( [[WIDE_LOAD]]) -; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] -; TFFALLBACK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to * -; TFFALLBACK-NEXT: store [[TMP6]], * [[TMP8]], align 4 +; TFFALLBACK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] +; TFFALLBACK-NEXT: store [[TMP6]], ptr [[TMP7]], align 4 ; TFFALLBACK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -553,11 +541,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 { ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] ; TFFALLBACK: for.body: ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4 +; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 ; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]] -; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]] -; TFFALLBACK-NEXT: store i64 [[CALL]], i64* [[ARRAYIDX]], align 4 +; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] +; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -569,11 +557,11 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gep = getelementptr i64, i64* %b, i64 %indvars.iv - %load = load i64, i64* %gep + %gep = getelementptr i64, ptr %b, i64 %indvars.iv + %load = load i64, ptr %gep %call = call i64 @foo(i64 %load) #3 - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv - store i64 %call, i64* %arrayidx + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %call, ptr %arrayidx %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1024 br i1 %exitcond, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll index 04c0f2d..6f6ce72 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll @@ -5,12 +5,12 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-COST: Checking a loop in 'fixed_width' -; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction: store i32 2, i32* %arrayidx1, align 4 -; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction: store i32 2, i32* %arrayidx1, align 4 +; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction: store i32 2, ptr %arrayidx1, align 4 +; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction: store i32 2, ptr %arrayidx1, align 4 ; CHECK-COST: Selecting VF: 1. ; We should decide this loop is not worth vectorising using fixed width vectors -define void @fixed_width(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 { +define void @fixed_width(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @fixed_width( ; CHECK-NOT: vector.body entry: @@ -28,14 +28,14 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo for.body: ; preds = %for.body.preheader, %for.inc %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.07 + %0 = load i32, ptr %arrayidx, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %for.inc, label %if.then if.then: ; preds = %for.body - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07 - store i32 2, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07 + store i32 2, ptr %arrayidx1, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -46,12 +46,12 @@ for.inc: ; preds = %for.body, %if.then ; CHECK-COST: Checking a loop in 'scalable' -; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction: store i32 2, i32* %arrayidx1, align 4 +; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction: store i32 2, ptr %arrayidx1, align 4 -define void @scalable(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 { +define void @scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @scalable( ; CHECK: vector.body -; CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32 +; CHECK: call void @llvm.masked.store.nxv4i32.p0 entry: %cmp6 = icmp sgt i64 %n, 0 br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup @@ -67,14 +67,14 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo for.body: ; preds = %for.body.preheader, %for.inc %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.07 + %0 = load i32, ptr %arrayidx, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %for.inc, label %if.then if.then: ; preds = %for.body - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07 - store i32 2, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07 + store i32 2, ptr %arrayidx1, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll index 85f9e6d..3550d40 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll @@ -32,21 +32,21 @@ target triple = "aarch64--linux-gnu" ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @max_vf(%struct.pair* noalias nocapture %p) { +define void @max_vf(ptr noalias nocapture %p) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %0 = add nuw nsw i64 %i, 2 - %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0 - %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0 - %1 = load i32, i32* %p_i.x, align 4 - store i32 %1, i32* %p_i_plus_2.x, align 4 - %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1 - %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1 - %2 = load i32, i32* %p_i.y, align 4 - store i32 %2, i32* %p_i_plus_2.y, align 4 + %p_i.x = getelementptr inbounds %struct.pair, ptr %p, i64 %i, i32 0 + %p_i_plus_2.x = getelementptr inbounds %struct.pair, ptr %p, i64 %0, i32 0 + %1 = load i32, ptr %p_i.x, align 4 + store i32 %1, ptr %p_i_plus_2.x, align 4 + %p_i.y = getelementptr inbounds %struct.pair, ptr %p, i64 %i, i32 1 + %p_i_plus_2.y = getelementptr inbounds %struct.pair, ptr %p, i64 %0, i32 1 + %2 = load i32, ptr %p_i.y, align 4 + store i32 %2, ptr %p_i_plus_2.y, align 4 %i.next = add nuw nsw i64 %i, 1 %cond = icmp eq i64 %i.next, 1000 br i1 %cond, label %for.exit, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll index 03cb59b..785241d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll @@ -9,14 +9,14 @@ target triple = "aarch64--linux-gnu" ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2 ; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions ; -define void @all_scalar(i64* %a, i64 %n) { +define void @all_scalar(ptr %a, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %tmp0 = getelementptr i64, i64* %a, i64 %i - store i64 0, i64* %tmp0, align 1 + %tmp0 = getelementptr i64, ptr %a, i64 %i + store i64 0, ptr %tmp0, align 1 %i.next = add nuw nsw i64 %i, 2 %cond = icmp eq i64 %i.next, %n br i1 %cond, label %for.end, label %for.body @@ -30,15 +30,15 @@ for.end: ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %i.next = zext i32 %j.next to i64 ; CHECK: LV: Not considering vector loop of width 8 because it will not generate any vector instructions %struct.a = type { i32, i8 } -define void @PR33193(%struct.a* %a, i64 %n) { +define void @PR33193(ptr %a, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %j = phi i32 [ 0, %entry ], [ %j.next, %for.body ] - %tmp0 = getelementptr inbounds %struct.a, %struct.a* %a, i64 %i, i32 1 - store i8 0, i8* %tmp0, align 4 + %tmp0 = getelementptr inbounds %struct.a, ptr %a, i64 %i, i32 1 + store i8 0, ptr %tmp0, align 4 %j.next = add i32 %j, 1 %i.next = zext i32 %j.next to i64 %cond = icmp ugt i64 %n, %i.next diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index f6228fc..80410a4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -24,17 +24,17 @@ ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] -; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]] +; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> ) +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> ) ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] -; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], <4 x i64> %[[VecInd]] +; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> %[[InnerPhi]], ; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 @@ -59,17 +59,17 @@ entry: for.body: ; preds = %for.inc8, %entry %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ] - %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21 + %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21 %0 = trunc i64 %indvars.iv21 to i32 - store i32 %0, i32* %arrayidx, align 4 + store i32 %0, ptr %arrayidx, align 4 %1 = trunc i64 %indvars.iv21 to i32 %add = add nsw i32 %1, %n br label %for.body3 for.body3: ; preds = %for.body3, %for.body %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] - %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21 - store i32 %add, i32* %arrayidx7, align 4 + %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21 + store i32 %add, ptr %arrayidx7, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 8 br i1 %exitcond, label %for.inc8, label %for.body3 @@ -91,15 +91,15 @@ for.end10: ; preds = %for.inc8 ; CHECK-LABEL: vector.body: ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] ; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ , %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] -; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[VecInd]], <2 x i64*> %[[AAddr]], i32 4, <2 x i1> ) +; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, <2 x i64> %[[VecInd]] +; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> ) ; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]] ; CHECK: br label %[[InnerLoop:.+]] ; CHECK: [[InnerLoop]]: ; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] -; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]] -; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[StoreVal]], <2 x i64*> %[[AAddr2]], i32 4, <2 x i1> +; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]] +; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> ; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], ; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], ; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0 @@ -117,15 +117,15 @@ entry: for.body: ; preds = %for.inc8, %entry %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ] - %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, i64 %indvars.iv21 - store i64 %indvars.iv21, i64* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, i64 %indvars.iv21 + store i64 %indvars.iv21, ptr %arrayidx, align 4 %add = add nsw i64 %indvars.iv21, %n br label %for.body3 for.body3: ; preds = %for.body3, %for.body %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] - %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21 - store i64 %add, i64* %arrayidx7, align 4 + %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21 + store i64 %add, ptr %arrayidx7, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 8 br i1 %exitcond, label %for.inc8, label %for.body3 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll index f86540a..43d0e8f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll @@ -7,32 +7,32 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128-p:16:16-p4:32:16" ; Check that all the loads are scalarized -; CHECK: load i16, i16* -; CHECK: load i16, i16* -; CHECK: load i16, i16 addrspace(4)* -; CHECK: load i16, i16 addrspace(4)* +; CHECK: load i16, ptr +; CHECK: load i16, ptr +; CHECK: load i16, ptr addrspace(4) +; CHECK: load i16, ptr addrspace(4) ; CHECK: store <2 x i16> %rec1445 = type { i16, i16, i16, i16, i16 } -define void @foo(%rec1445* %a, %rec1445 addrspace(4)* %b, i16* noalias %dst) { +define void @foo(ptr %a, ptr addrspace(4) %b, ptr noalias %dst) { bb1: br label %bb4 bb4: %tmp1 = phi i16 [ 0, %bb1 ], [ %_tmp1013, %bb4 ] - %tmp2 = phi %rec1445* [ %a, %bb1 ], [ %_tmp1015, %bb4 ] - %tmp3 = phi %rec1445 addrspace(4)* [ %b, %bb1 ], [ %_tmp1017, %bb4 ] - %0 = getelementptr %rec1445, %rec1445* %tmp2, i16 0, i32 1 - %_tmp987 = load i16, i16* %0, align 1 - %1 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 0, i32 1 - %_tmp993 = load i16, i16 addrspace(4)* %1, align 1 + %tmp2 = phi ptr [ %a, %bb1 ], [ %_tmp1015, %bb4 ] + %tmp3 = phi ptr addrspace(4) [ %b, %bb1 ], [ %_tmp1017, %bb4 ] + %0 = getelementptr %rec1445, ptr %tmp2, i16 0, i32 1 + %_tmp987 = load i16, ptr %0, align 1 + %1 = getelementptr %rec1445, ptr addrspace(4) %tmp3, i32 0, i32 1 + %_tmp993 = load i16, ptr addrspace(4) %1, align 1 %add = add i16 %_tmp987, %_tmp993 - %dst.gep = getelementptr inbounds i16, i16* %dst, i16 %tmp1 - store i16 %add, i16* %dst.gep + %dst.gep = getelementptr inbounds i16, ptr %dst, i16 %tmp1 + store i16 %add, ptr %dst.gep %_tmp1013 = add i16 %tmp1, 1 - %_tmp1015 = getelementptr %rec1445, %rec1445* %tmp2, i16 1 - %_tmp1017 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 1 + %_tmp1015 = getelementptr %rec1445, ptr %tmp2, i16 1 + %_tmp1017 = getelementptr %rec1445, ptr addrspace(4) %tmp3, i32 1 %_tmp1019 = icmp ult i16 %_tmp1013, 24 br i1 %_tmp1019, label %bb4, label %bb16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll index bf93ec8..20b5364 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll @@ -3,19 +3,19 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" @b = common local_unnamed_addr global i32 0, align 4 -@a = common local_unnamed_addr global i16* null, align 8 +@a = common local_unnamed_addr global ptr null, align 8 define i32 @fn1() local_unnamed_addr #0 { ; We expect the backend to expand all reductions. ; CHECK: @llvm.vector.reduce entry: - %0 = load i32, i32* @b, align 4, !tbaa !1 + %0 = load i32, ptr @b, align 4, !tbaa !1 %cmp40 = icmp sgt i32 %0, 0 br i1 %cmp40, label %for.body.lr.ph, label %for.end for.body.lr.ph: ; preds = %entry - %1 = load i16*, i16** @a, align 8, !tbaa !5 - %2 = load i32, i32* @b, align 4, !tbaa !1 + %1 = load ptr, ptr @a, align 8, !tbaa !5 + %2 = load i32, ptr @b, align 4, !tbaa !1 %3 = sext i32 %2 to i64 br label %for.body @@ -23,8 +23,8 @@ for.body: ; preds = %for.body.lr.ph, %fo %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] %d.043 = phi i16 [ undef, %for.body.lr.ph ], [ %.sink28, %for.body ] %c.042 = phi i16 [ undef, %for.body.lr.ph ], [ %c.0., %for.body ] - %arrayidx = getelementptr inbounds i16, i16* %1, i64 %indvars.iv - %4 = load i16, i16* %arrayidx, align 2, !tbaa !7 + %arrayidx = getelementptr inbounds i16, ptr %1, i64 %indvars.iv + %4 = load i16, ptr %arrayidx, align 2, !tbaa !7 %cmp2 = icmp sgt i16 %c.042, %4 %c.0. = select i1 %cmp2, i16 %c.042, i16 %4 %cmp13 = icmp slt i16 %d.043, %4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll index e4e415a..44820e0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll @@ -14,7 +14,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @_Z1dv() local_unnamed_addr #0 { ; CHECK-LABEL: @_Z1dv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) +; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @"_ZN3$_01aEv"(ptr nonnull @b) ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[F_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD5:%.*]], [[FOR_COND_CLEANUP:%.*]] ] @@ -36,16 +36,16 @@ define void @_Z1dv() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV]], [[TMP1]] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[ADD]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 [[TMP2]], i8* [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], ptr @c, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 [[TMP2]], ptr [[ARRAYIDX3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; entry: - %call = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) #2 + %call = tail call ptr @"_ZN3$_01aEv"(ptr nonnull @b) #2 br label %for.cond for.cond: ; preds = %for.cond.cleanup, %entry @@ -72,13 +72,13 @@ for.body: ; preds = %for.body, %for.body %1 = trunc i64 %indvars.iv to i32 %add = add i32 %conv, %1 %idxprom = zext i32 %add to i64 - %arrayidx = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 %idxprom - %2 = load i8, i8* %arrayidx, align 1 - %arrayidx3 = getelementptr inbounds i8, i8* %call, i64 %indvars.iv - store i8 %2, i8* %arrayidx3, align 1 + %arrayidx = getelementptr inbounds [6 x i8], ptr @c, i64 0, i64 %idxprom + %2 = load i8, ptr %arrayidx, align 1 + %arrayidx3 = getelementptr inbounds i8, ptr %call, i64 %indvars.iv + store i8 %2, ptr %arrayidx3, align 1 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 4 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } -declare i8* @"_ZN3$_01aEv"(%struct.anon*) local_unnamed_addr #1 +declare ptr @"_ZN3$_01aEv"(ptr) local_unnamed_addr #1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll index 2a974e5..ae5e7d0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll @@ -3,9 +3,9 @@ ; CHECK-LABEL: define void @test( ; CHECK: vector.body -define void @test(i64* %dst, i32* %src) { +define void @test(ptr %dst, ptr %src) { entry: - %l = load i32, i32* %src + %l = load i32, ptr %src br label %loop.ph loop.ph: @@ -14,8 +14,8 @@ loop.ph: loop: %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ] %l.cast = sext i32 %l to i64 - %dst.idx = getelementptr i64, i64* %dst, i64 %iv - store i64 %l.cast, i64* %dst.idx + %dst.idx = getelementptr i64, ptr %dst, i64 %iv + store i64 %l.cast, ptr %dst.idx %iv.next = add nuw nsw i64 %iv, 1 %cmp9.us = icmp ult i64 %iv.next, 20 br i1 %cmp9.us, label %loop, label %exit diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll index 32c0076..ff544df 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll @@ -21,17 +21,17 @@ target triple = "aarch64--linux-gnu" ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3 ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 ; -define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) { +define i32 @predicated_udiv(ptr %a, ptr %b, i1 %c, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ] - %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i - %tmp1 = getelementptr inbounds i32, i32* %b, i64 %i - %tmp2 = load i32, i32* %tmp0, align 4 - %tmp3 = load i32, i32* %tmp1, align 4 + %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i + %tmp1 = getelementptr inbounds i32, ptr %b, i64 %i + %tmp2 = load i32, ptr %tmp0, align 4 + %tmp3 = load i32, ptr %tmp1, align 4 br i1 %c, label %if.then, label %for.inc if.then: @@ -59,22 +59,22 @@ for.end: ; Cost of store: ; (store(4) + extractelement(3)) / 2 = 3 ; -; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4 -; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4 +; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %tmp0, align 4 +; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 ; -define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) { +define void @predicated_store(ptr %a, i1 %c, i32 %x, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] - %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i - %tmp1 = load i32, i32* %tmp0, align 4 + %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i + %tmp1 = load i32, ptr %tmp0, align 4 %tmp2 = add nsw i32 %tmp1, %x br i1 %c, label %if.then, label %for.inc if.then: - store i32 %tmp2, i32* %tmp0, align 4 + store i32 %tmp2, ptr %tmp0, align 4 br label %for.inc for.inc: @@ -90,31 +90,31 @@ for.end: ; ; Same as predicate_store except we use a pointer PHI to maintain the address ; -; CHECK: Found scalar instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ] -; CHECK: Found scalar instruction: %addr.next = getelementptr inbounds i32, i32* %addr, i64 1 -; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %addr, align 4 -; CHECK: Found an estimated cost of 0 for VF 2 For instruction: %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ] -; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %addr, align 4 +; CHECK: Found scalar instruction: %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ] +; CHECK: Found scalar instruction: %addr.next = getelementptr inbounds i32, ptr %addr, i64 1 +; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %addr, align 4 +; CHECK: Found an estimated cost of 0 for VF 2 For instruction: %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ] +; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, ptr %addr, align 4 ; -define void @predicated_store_phi(i32* %a, i1 %c, i32 %x, i64 %n) { +define void @predicated_store_phi(ptr %a, i1 %c, i32 %x, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] - %addr = phi i32 * [ %a, %entry ], [ %addr.next, %for.inc ] - %tmp1 = load i32, i32* %addr, align 4 + %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ] + %tmp1 = load i32, ptr %addr, align 4 %tmp2 = add nsw i32 %tmp1, %x br i1 %c, label %if.then, label %for.inc if.then: - store i32 %tmp2, i32* %addr, align 4 + store i32 %tmp2, ptr %addr, align 4 br label %for.inc for.inc: %i.next = add nuw nsw i64 %i, 1 %cond = icmp slt i64 %i.next, %n - %addr.next = getelementptr inbounds i32, i32* %addr, i64 1 + %addr.next = getelementptr inbounds i32, ptr %addr, i64 1 br i1 %cond, label %for.body, label %for.end for.end: @@ -138,15 +138,15 @@ for.end: ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp3 = add nsw i32 %tmp2, %x ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3 ; -define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) { +define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ] - %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i - %tmp2 = load i32, i32* %tmp0, align 4 + %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i + %tmp2 = load i32, ptr %tmp0, align 4 br i1 %c, label %if.then, label %for.inc if.then: @@ -179,23 +179,23 @@ for.end: ; store(4) / 2 = 2 ; ; CHECK: Scalarizing: %tmp2 = add nsw i32 %tmp1, %x -; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4 +; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %tmp0, align 4 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = add nsw i32 %tmp1, %x -; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4 +; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 ; -define void @predicated_store_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) { +define void @predicated_store_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] - %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i - %tmp1 = load i32, i32* %tmp0, align 4 + %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i + %tmp1 = load i32, ptr %tmp0, align 4 br i1 %c, label %if.then, label %for.inc if.then: %tmp2 = add nsw i32 %tmp1, %x - store i32 %tmp2, i32* %tmp0, align 4 + store i32 %tmp2, ptr %tmp0, align 4 br label %for.inc for.inc: @@ -231,21 +231,21 @@ for.end: ; CHECK: Scalarizing and predicating: %tmp3 = sdiv i32 %tmp1, %tmp2 ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp3, %tmp2 ; CHECK: Scalarizing: %tmp5 = sub i32 %tmp4, %x -; CHECK: Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4 +; CHECK: Scalarizing and predicating: store i32 %tmp5, ptr %tmp0, align 4 ; CHECK: Found an estimated cost of 1 for VF 2 For instruction: %tmp2 = add i32 %tmp1, %x ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2 ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp5 = sub i32 %tmp4, %x -; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, i32* %tmp0, align 4 +; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, ptr %tmp0, align 4 ; -define void @predication_multi_context(i32* %a, i1 %c, i32 %x, i64 %n) { +define void @predication_multi_context(ptr %a, i1 %c, i32 %x, i64 %n) { entry: br label %for.body for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ] - %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i - %tmp1 = load i32, i32* %tmp0, align 4 + %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i + %tmp1 = load i32, ptr %tmp0, align 4 br i1 %c, label %if.then, label %for.inc if.then: @@ -253,7 +253,7 @@ if.then: %tmp3 = sdiv i32 %tmp1, %tmp2 %tmp4 = udiv i32 %tmp3, %tmp2 %tmp5 = sub i32 %tmp4, %x - store i32 %tmp5, i32* %tmp0, align 4 + store i32 %tmp5, ptr %tmp0, align 4 br label %for.inc for.inc: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll index fea9172..a60577b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll @@ -23,7 +23,7 @@ target triple = "aarch64--linux-gnu" ; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> ; CHECK: zext i8 [[Rdx]] to i32 ; -define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { +define i8 @reduction_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { entry: %cmp.12 = icmp sgt i32 %n, 0 br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup @@ -43,11 +43,11 @@ for.cond.cleanup: for.body: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv - %0 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv + %0 = load i8, ptr %arrayidx, align 1 %conv = zext i8 %0 to i32 - %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv - %1 = load i8, i8* %arrayidx2, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv + %1 = load i8, ptr %arrayidx2, align 1 %conv3 = zext i8 %1 to i32 %conv4 = and i32 %sum.013, 255 %add = add nuw nsw i32 %conv, %conv4 @@ -78,7 +78,7 @@ for.body: ; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> ; CHECK: zext i16 [[Rdx]] to i32 ; -define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) { +define i16 @reduction_i16_1(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { entry: %cmp.16 = icmp sgt i32 %n, 0 br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup @@ -98,11 +98,11 @@ for.cond.cleanup: for.body: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i16, i16* %a, i64 %indvars.iv - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %a, i64 %indvars.iv + %0 = load i16, ptr %arrayidx, align 2 %conv.14 = zext i16 %0 to i32 - %arrayidx2 = getelementptr inbounds i16, i16* %b, i64 %indvars.iv - %1 = load i16, i16* %arrayidx2, align 2 + %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv + %1 = load i16, ptr %arrayidx2, align 2 %conv3.15 = zext i16 %1 to i32 %conv4.13 = and i32 %sum.017, 65535 %add = add nuw nsw i32 %conv.14, %conv4.13 @@ -135,7 +135,7 @@ for.body: ; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> ; CHECK: zext i16 [[Rdx]] to i32 ; -define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) { +define i16 @reduction_i16_2(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { entry: %cmp.14 = icmp sgt i32 %n, 0 br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup @@ -155,11 +155,11 @@ for.cond.cleanup: for.body: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv - %0 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv + %0 = load i8, ptr %arrayidx, align 1 %conv = zext i8 %0 to i32 - %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv - %1 = load i8, i8* %arrayidx2, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv + %1 = load i8, ptr %arrayidx2, align 1 %conv3 = zext i8 %1 to i32 %conv4.13 = and i32 %sum.015, 65535 %add = add nuw nsw i32 %conv, %conv4.13 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll index 71b7754..4dd6522 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll @@ -10,23 +10,23 @@ ; CHECK-LABEL: define {{.*}} @test_tc_too_small ; CHECK-NOT: vector.memcheck ; CHECK-NOT: vector.body -define void @test_tc_too_small(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) { +define void @test_tc_too_small(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2) { entry: br label %loop loop: ; preds = %bb54, %bb37 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv - %lv.1 = load i16, i16* %gep.1, align 2 + %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv + %lv.1 = load i16, ptr %gep.1, align 2 %ext.1 = sext i16 %lv.1 to i32 - %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv - %lv.2 = load i16, i16* %gep.2, align 2 + %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv + %lv.2 = load i16, ptr %gep.2, align 2 %ext.2 = sext i16 %lv.2 to i32 - %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1 - %lv.3 = load i16, i16* %gep.off.1, align 2 + %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1 + %lv.3 = load i16, ptr %gep.off.1, align 2 %ext.3 = sext i16 %lv.3 to i32 - %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2 - %lv.4 = load i16, i16* %gep.off.2, align 2 + %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2 + %lv.4 = load i16, ptr %gep.off.2, align 2 %ext.4 = sext i16 %lv.4 to i32 %tmp62 = mul nsw i32 %ext.2, 11 %tmp66 = mul nsw i32 %ext.3, -4 @@ -45,11 +45,11 @@ loop: ; preds = %bb54, %bb37 %tmp82 = sub nsw i32 %tmp81, %ext.1 %tmp83 = lshr i32 %tmp82, 1 %trunc.1 = trunc i32 %tmp75 to i16 - %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv - store i16 %trunc.1, i16* %gep.3, align 2 + %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv + store i16 %trunc.1, ptr %gep.3, align 2 %trunc.2 = trunc i32 %tmp83 to i16 - %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv - store i16 %trunc.2, i16* %gep.4, align 2 + %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv + store i16 %trunc.2, ptr %gep.4, align 2 %iv.next = add nuw nsw i64 %iv, 1 %cmp = icmp ult i64 %iv, 10 br i1 %cmp, label %loop, label %exit @@ -65,23 +65,23 @@ exit: ; THRESHOLD-NOT: vector.memcheck ; THRESHOLD-NOT: vector.body ; -define void @test_tc_big_enough(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) { +define void @test_tc_big_enough(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2) { entry: br label %loop loop: ; preds = %bb54, %bb37 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv - %lv.1 = load i16, i16* %gep.1, align 2 + %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv + %lv.1 = load i16, ptr %gep.1, align 2 %ext.1 = sext i16 %lv.1 to i32 - %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv - %lv.2 = load i16, i16* %gep.2, align 2 + %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv + %lv.2 = load i16, ptr %gep.2, align 2 %ext.2 = sext i16 %lv.2 to i32 - %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1 - %lv.3 = load i16, i16* %gep.off.1, align 2 + %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1 + %lv.3 = load i16, ptr %gep.off.1, align 2 %ext.3 = sext i16 %lv.3 to i32 - %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2 - %lv.4 = load i16, i16* %gep.off.2, align 2 + %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2 + %lv.4 = load i16, ptr %gep.off.2, align 2 %ext.4 = sext i16 %lv.4 to i32 %tmp62 = mul nsw i32 %ext.2, 11 %tmp66 = mul nsw i32 %ext.3, -4 @@ -100,11 +100,11 @@ loop: ; preds = %bb54, %bb37 %tmp82 = sub nsw i32 %tmp81, %ext.1 %tmp83 = lshr i32 %tmp82, 1 %trunc.1 = trunc i32 %tmp75 to i16 - %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv - store i16 %trunc.1, i16* %gep.3, align 2 + %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv + store i16 %trunc.1, ptr %gep.3, align 2 %trunc.2 = trunc i32 %tmp83 to i16 - %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv - store i16 %trunc.2, i16* %gep.4, align 2 + %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv + store i16 %trunc.2, ptr %gep.4, align 2 %iv.next = add nuw nsw i64 %iv, 1 %cmp = icmp ult i64 %iv, 500 br i1 %cmp, label %loop, label %exit @@ -113,7 +113,7 @@ exit: ret void } -define void @test_tc_unknown(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2, i64 %N) { +define void @test_tc_unknown(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2, i64 %N) { ; CHECK-LABEL: define void @test_tc_unknown ; DEFAULT: [[ADD:%.+]] = add i64 %N, 1 ; DEFAULT-NEXT: [[C:%.+]] = icmp ult i64 [[ADD]], 16 @@ -126,17 +126,17 @@ entry: loop: ; preds = %bb54, %bb37 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv - %lv.1 = load i16, i16* %gep.1, align 2 + %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv + %lv.1 = load i16, ptr %gep.1, align 2 %ext.1 = sext i16 %lv.1 to i32 - %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv - %lv.2 = load i16, i16* %gep.2, align 2 + %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv + %lv.2 = load i16, ptr %gep.2, align 2 %ext.2 = sext i16 %lv.2 to i32 - %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1 - %lv.3 = load i16, i16* %gep.off.1, align 2 + %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1 + %lv.3 = load i16, ptr %gep.off.1, align 2 %ext.3 = sext i16 %lv.3 to i32 - %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2 - %lv.4 = load i16, i16* %gep.off.2, align 2 + %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2 + %lv.4 = load i16, ptr %gep.off.2, align 2 %ext.4 = sext i16 %lv.4 to i32 %tmp62 = mul nsw i32 %ext.2, 11 %tmp66 = mul nsw i32 %ext.3, -4 @@ -155,11 +155,11 @@ loop: ; preds = %bb54, %bb37 %tmp82 = sub nsw i32 %tmp81, %ext.1 %tmp83 = lshr i32 %tmp82, 1 %trunc.1 = trunc i32 %tmp75 to i16 - %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv - store i16 %trunc.1, i16* %gep.3, align 2 + %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv + store i16 %trunc.1, ptr %gep.3, align 2 %trunc.2 = trunc i32 %tmp83 to i16 - %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv - store i16 %trunc.2, i16* %gep.4, align 2 + %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv + store i16 %trunc.2, ptr %gep.4, align 2 %iv.next = add nuw nsw i64 %iv, 1 %cmp = icmp ult i64 %iv, %N br i1 %cmp, label %loop, label %exit diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll index b001538..b66bb94 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll @@ -4,7 +4,7 @@ ; CHECK-REMARKS: UserVF ignored because of invalid costs. ; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): alloca ; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store -define void @alloca(i32** %vla, i64 %N) { +define void @alloca(ptr %vla, i64 %N) { ; CHECK-LABEL: @alloca( ; CHECK-NOT: %t | FileCheck %s ; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS -define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) { +define void @vec_load(i64 %N, ptr nocapture %a, ptr nocapture readonly %b) { ; CHECK-LABEL: @vec_load ; CHECK: vector.body: -; CHECK: %[[LOAD:.*]] = load , * +; CHECK: %[[LOAD:.*]] = load , ptr ; CHECK: call @foo_vec( %[[LOAD]]) entry: %cmp7 = icmp sgt i64 %N, 0 @@ -16,12 +16,12 @@ entry: for.body: ; preds = %for.body.preheader, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds double, double* %b, i64 %iv - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %b, i64 %iv + %0 = load double, ptr %arrayidx, align 8 %1 = call double @foo(double %0) #0 %add = fadd double %1, 1.000000e+00 - %arrayidx2 = getelementptr inbounds double, double* %a, i64 %iv - store double %add, double* %arrayidx2, align 8 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %iv + store double %add, ptr %arrayidx2, align 8 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 @@ -30,7 +30,7 @@ for.end: ; preds = %for.body, %entry ret void } -define void @vec_scalar(i64 %N, double* nocapture %a) { +define void @vec_scalar(i64 %N, ptr nocapture %a) { ; CHECK-LABEL: @vec_scalar ; CHECK: vector.body: ; CHECK: call @foo_vec( shufflevector ( insertelement ( poison, double 1.000000e+01, i32 0), poison, zeroinitializer)) @@ -42,8 +42,8 @@ for.body: ; preds = %for.body.preheader, %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %0 = call double @foo(double 10.0) #0 %sub = fsub double %0, 1.000000e+00 - %arrayidx = getelementptr inbounds double, double* %a, i64 %iv - store double %sub, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv + store double %sub, ptr %arrayidx, align 8 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 @@ -52,22 +52,22 @@ for.end: ; preds = %for.body, %entry ret void } -define void @vec_ptr(i64 %N, i64* noalias %a, i64** readnone %b) { +define void @vec_ptr(i64 %N, ptr noalias %a, ptr readnone %b) { ; CHECK-LABEL: @vec_ptr ; CHECK: vector.body: -; CHECK: %[[LOAD:.*]] = load , * -; CHECK: call @bar_vec( %[[LOAD]]) +; CHECK: %[[LOAD:.*]] = load , ptr +; CHECK: call @bar_vec( %[[LOAD]]) entry: %cmp7 = icmp sgt i64 %N, 0 br i1 %cmp7, label %for.body, label %for.end for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %gep = getelementptr i64*, i64** %b, i64 %iv - %load = load i64*, i64** %gep - %call = call i64 @bar(i64* %load) #1 - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv - store i64 %call, i64* %arrayidx + %gep = getelementptr ptr, ptr %b, i64 %iv + %load = load ptr, ptr %gep + %call = call i64 @bar(ptr %load) #1 + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv + store i64 %call, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1024 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 @@ -76,10 +76,10 @@ for.end: ret void } -define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) { +define void @vec_intrinsic(i64 %N, ptr nocapture readonly %a) { ; CHECK-LABEL: @vec_intrinsic ; CHECK: vector.body: -; CHECK: %[[LOAD:.*]] = load , * +; CHECK: %[[LOAD:.*]] = load , ptr ; CHECK: call fast @sin_vec_nxv2f64( %[[LOAD]]) entry: %cmp7 = icmp sgt i64 %N, 0 @@ -87,11 +87,11 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds double, double* %a, i64 %iv - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv + %0 = load double, ptr %arrayidx, align 8 %1 = call fast double @llvm.sin.f64(double %0) #2 %add = fadd fast double %1, 1.000000e+00 - store double %add, double* %arrayidx, align 8 + store double %add, ptr %arrayidx, align 8 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %N br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 @@ -104,7 +104,7 @@ for.end: ; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load ; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32 ; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store -define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) { +define void @vec_sin_no_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) { ; CHECK: @vec_sin_no_mapping ; CHECK: call fast <2 x float> @llvm.sin.v2f32 ; CHECK-NOT: @llvm.sin.v2f32 ; CHECK-NOT: @llvm.sqrt.nxv2f32 entry: @@ -199,11 +199,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07 - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07 + %0 = load float, ptr %arrayidx, align 4 %1 = tail call fast float @llvm.sqrt.f32(float %0) - %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07 - store float %1, float* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07 + store float %1, ptr %arrayidx1, align 4 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1 @@ -214,13 +214,13 @@ for.cond.cleanup: ; preds = %for.body declare double @foo(double) -declare i64 @bar(i64*) +declare i64 @bar(ptr) declare double @llvm.sin.f64(double) declare float @llvm.sin.f32(float) declare float @llvm.sqrt.f32(float) declare @foo_vec() -declare @bar_vec() +declare @bar_vec() declare @sin_vec_nxv2f64() declare <2 x double> @sin_vec_v2f64(<2 x double>) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll index 4a6b1fb..8123651 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll @@ -11,7 +11,7 @@ target triple = "aarch64-unknown-linux-gnu" ; a[i] /= b[i]; ; } -define void @predication_in_loop(i32* %a, i32* %b, i32* %cond) #0 { +define void @predication_in_loop(ptr %a, ptr %b, ptr %cond) #0 { ; CHECK-LABEL: @predication_in_loop ; CHECK: sdiv ; @@ -23,18 +23,18 @@ for.cond.cleanup: ; preds = %for.inc, %entry for.body: ; preds = %entry, %for.inc %i.09 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %i.09 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %i.09 + %0 = load i32, ptr %arrayidx, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %for.inc, label %if.then if.then: ; preds = %for.body - %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i.09 - %1 = load i32, i32* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.09 - %2 = load i32, i32* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i.09 + %1 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.09 + %2 = load i32, ptr %arrayidx2, align 4 %div = sdiv i32 %2, %1 - store i32 %div, i32* %arrayidx2, align 4 + store i32 %div, ptr %arrayidx2, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -56,7 +56,7 @@ for.inc: ; preds = %for.body, %if.then ; otherwise it could be able to vectorize, but will not because ; "Max legal vector width too small, scalable vectorization unfeasible.." -define void @unpredicated_loop_predication_through_tailfolding(i32* %a, i32* %b) #0 { +define void @unpredicated_loop_predication_through_tailfolding(ptr %a, ptr %b) #0 { ; CHECK-LABEL: @unpredicated_loop_predication_through_tailfolding ; CHECK-NOT: sdiv @@ -65,14 +65,14 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %sdiv = sdiv i32 %1, %0 %2 = add nuw nsw i64 %iv, 8 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %sdiv, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %sdiv, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll index bb11b2d..9989518 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll @@ -1,30 +1,30 @@ ; RUN: opt < %s -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ ; RUN: -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S | FileCheck %s -define void @invariant_store_red_exit_is_phi(i32* %dst, i32* readonly %src, i64 %n) { +define void @invariant_store_red_exit_is_phi(ptr %dst, ptr readonly %src, i64 %n) { ; CHECK-LABEL: @invariant_store_red_exit_is_phi( ; CHECK: vector.ph: ; CHECK: %[[ACTIVE_LANE_MASK_ENTRY:.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %n) ; CHECK: vector.body: ; CHECK: %[[ACTIVE_LANE_MASK:.*]] = phi [ %[[ACTIVE_LANE_MASK_ENTRY]], %vector.ph ], [ %[[ACTIVE_LANE_MASK_NEXT:.*]], %vector.body ] ; CHECK: %[[VEC_PHI:.*]] = phi [ zeroinitializer, %vector.ph ], [ %[[PREDPHI:.*]], %vector.body ] -; CHECK: %[[LOAD:.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32 +; CHECK: %[[LOAD:.*]] = call @llvm.masked.load.nxv4i32.p0 ; CHECK-NEXT: %[[ADD:.*]] = add %[[VEC_PHI]], %[[LOAD]] ; CHECK-NEXT: %[[SELECT:.*]] = select %[[ACTIVE_LANE_MASK]], %[[ADD]], %[[VEC_PHI]] ; CHECK: %[[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 %{{.*}}, i64 %n) ; CHECK: middle.block: ; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( %[[SELECT]]) -; CHECK-NEXT: store i32 %[[SUM]], i32* %dst, align 4 +; CHECK-NEXT: store i32 %[[SUM]], ptr %dst, align 4 entry: br label %for.body for.body: ; preds = %entry, %for.inc %red = phi i32 [ 0, %entry ], [ %storemerge, %for.body ] %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx6 = getelementptr inbounds i32, i32* %src, i64 %indvars.iv - %load = load i32, i32* %arrayidx6, align 4 + %arrayidx6 = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %load = load i32, ptr %arrayidx6, align 4 %storemerge = add i32 %red, %load - store i32 %storemerge, i32* %dst, align 4 + store i32 %storemerge, ptr %dst, align 4 %indvars.iv.next = add nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll index 381efbd..fb8c752 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll @@ -7,7 +7,7 @@ ; ADD ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) { +define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @add ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -23,8 +23,8 @@ entry: for.body: ; preds = %entry, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 %add = add nsw i32 %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -37,7 +37,7 @@ for.end: ; preds = %for.body, %entry ; OR ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @or(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) { +define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @or ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -53,8 +53,8 @@ entry: for.body: ; preds = %entry, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 %or = or i32 %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -67,7 +67,7 @@ for.end: ; preds = %for.body, %entry ; AND ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @and(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) { +define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @and ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -83,8 +83,8 @@ entry: for.body: ; preds = %entry, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 %and = and i32 %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -97,7 +97,7 @@ for.end: ; preds = %for.body, %entry ; XOR ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @xor(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) { +define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @xor ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -113,8 +113,8 @@ entry: for.body: ; preds = %entry, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 %xor = xor i32 %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -127,7 +127,7 @@ for.end: ; preds = %for.body, %entry ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) ; SMIN -define i32 @smin(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) { +define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @smin ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -146,8 +146,8 @@ entry: for.body: ; preds = %entry, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 %cmp.i = icmp slt i32 %0, %sum.010 %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010 %iv.next = add nuw nsw i64 %iv, 1 @@ -161,7 +161,7 @@ for.end: ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) ; UMAX -define i32 @umax(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) { +define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @umax ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -180,8 +180,8 @@ entry: for.body: ; preds = %entry, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 %cmp.i = icmp ugt i32 %0, %sum.010 %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010 %iv.next = add nuw nsw i64 %iv, 1 @@ -195,7 +195,7 @@ for.end: ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) ; FADD (FAST) -define float @fadd_fast(float* noalias nocapture readonly %a, i64 %n) { +define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fadd_fast ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -211,8 +211,8 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %iv - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 %add = fadd fast float %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -224,7 +224,7 @@ for.end: ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. ; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) -define bfloat @fadd_fast_bfloat(bfloat* noalias nocapture readonly %a, i64 %n) { +define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fadd_fast_bfloat ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load <8 x bfloat> @@ -240,8 +240,8 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds bfloat, bfloat* %a, i64 %iv - %0 = load bfloat, bfloat* %arrayidx, align 4 + %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv + %0 = load bfloat, ptr %arrayidx, align 4 %add = fadd fast bfloat %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -254,7 +254,7 @@ for.end: ; FMIN (FAST) ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 { +define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-LABEL: @fmin_fast ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -273,8 +273,8 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %iv - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 %cmp.i = fcmp fast olt float %0, %sum.07 %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 %iv.next = add nuw nsw i64 %iv, 1 @@ -288,7 +288,7 @@ for.end: ; FMAX (FAST) ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) #0 { +define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-LABEL: @fmax_fast ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -307,8 +307,8 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %iv - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 %cmp.i = fcmp fast ogt float %0, %sum.07 %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 %iv.next = add nuw nsw i64 %iv, 1 @@ -322,7 +322,7 @@ for.end: ; ADD (with reduction stored in invariant address) ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2) -define void @invariant_store(i32* %dst, i32* readonly %src) { +define void @invariant_store(ptr %dst, ptr readonly %src) { ; CHECK-LABEL: @invariant_store ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load @@ -332,18 +332,18 @@ define void @invariant_store(i32* %dst, i32* readonly %src) { ; CHECK: middle.block: ; CHECK: %[[ADD:.*]] = add %[[ADD2]], %[[ADD1]] ; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( %[[ADD]]) -; CHECK-NEXT: store i32 %[[SUM]], i32* %gep.dst, align 4 +; CHECK-NEXT: store i32 %[[SUM]], ptr %gep.dst, align 4 entry: - %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42 - store i32 0, i32* %gep.dst, align 4 + %gep.dst = getelementptr inbounds i32, ptr %dst, i64 42 + store i32 0, ptr %gep.dst, align 4 br label %for.body for.body: %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gep.src = getelementptr inbounds i32, i32* %src, i64 %indvars.iv - %0 = load i32, i32* %gep.src, align 4 + %gep.src = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %0 = load i32, ptr %gep.src, align 4 %add = add nsw i32 %sum, %0 - store i32 %add, i32* %gep.dst, align 4 + store i32 %add, ptr %gep.dst, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -358,7 +358,7 @@ for.cond.cleanup: ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2) -define i32 @mul(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) { +define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @mul ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load <4 x i32> @@ -374,8 +374,8 @@ entry: for.body: ; preds = %entry, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 %mul = mul nsw i32 %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -388,7 +388,7 @@ for.end: ; preds = %for.body, %entry ; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2) -define i32 @memory_dependence(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) { +define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @memory_dependence ; CHECK: vector.body: ; CHECK: %[[LOAD1:.*]] = load <4 x i32> @@ -408,14 +408,14 @@ entry: for.body: %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i - %1 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i + %1 = load i32, ptr %arrayidx1, align 4 %add = add nsw i32 %1, %0 %add2 = add nuw nsw i64 %i, 32 - %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %add2 - store i32 %add, i32* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2 + store i32 %add, ptr %arrayidx3, align 4 %mul = mul nsw i32 %1, %sum %inc = add nuw nsw i64 %i, 1 %exitcond.not = icmp eq i64 %inc, %n diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll index 2a8f6ed3d..f28f77b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll @@ -30,20 +30,20 @@ ; VF-4: <4 x i32> ; VF-VSCALE4: <16 x i32> -define void @test0(i32* %a, i8* %b, i32* %c) #0 { +define void @test0(ptr %a, ptr %b, ptr %c) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv - %1 = load i8, i8* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv + %1 = load i8, ptr %arrayidx2, align 4 %zext = zext i8 %1 to i32 %add = add nsw i32 %zext, %0 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll index 3628ab0..e83eb72 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -6,7 +6,7 @@ ; Test that the MaxVF for the following loop, that has no dependence distances, ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 ; (maximized bandwidth for i8 in the loop). -define void @test0(i32* %a, i8* %b, i32* %c) #0 { +define void @test0(ptr %a, ptr %b, ptr %c) #0 { ; CHECK: LV: Checking a loop in 'test0' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16 @@ -19,14 +19,14 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv - %1 = load i8, i8* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv + %1 = load i8, ptr %arrayidx2, align 4 %zext = zext i8 %1 to i32 %add = add nsw i32 %zext, %0 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop @@ -37,7 +37,7 @@ exit: ; Test that the MaxVF for the following loop, with a dependence distance ; of 64 elements, is calculated as (maxvscale = 16) * 4. -define void @test1(i32* %a, i8* %b) #0 { +define void @test1(ptr %a, ptr %b) #0 { ; CHECK: LV: Checking a loop in 'test1' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16 @@ -50,15 +50,15 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv - %1 = load i8, i8* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv + %1 = load i8, ptr %arrayidx2, align 4 %zext = zext i8 %1 to i32 %add = add nsw i32 %zext, %0 %2 = add nuw nsw i64 %iv, 64 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop @@ -69,7 +69,7 @@ exit: ; Test that the MaxVF for the following loop, with a dependence distance ; of 32 elements, is calculated as (maxvscale = 16) * 2. -define void @test2(i32* %a, i8* %b) #0 { +define void @test2(ptr %a, ptr %b) #0 { ; CHECK: LV: Checking a loop in 'test2' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16 @@ -82,15 +82,15 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv - %1 = load i8, i8* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv + %1 = load i8, ptr %arrayidx2, align 4 %zext = zext i8 %1 to i32 %add = add nsw i32 %zext, %0 %2 = add nuw nsw i64 %iv, 32 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop @@ -101,7 +101,7 @@ exit: ; Test that the MaxVF for the following loop, with a dependence distance ; of 16 elements, is calculated as (maxvscale = 16) * 1. -define void @test3(i32* %a, i8* %b) #0 { +define void @test3(ptr %a, ptr %b) #0 { ; CHECK: LV: Checking a loop in 'test3' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16 @@ -114,15 +114,15 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv - %1 = load i8, i8* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv + %1 = load i8, ptr %arrayidx2, align 4 %zext = zext i8 %1 to i32 %add = add nsw i32 %zext, %0 %2 = add nuw nsw i64 %iv, 16 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop @@ -133,7 +133,7 @@ exit: ; Test the fallback mechanism when scalable vectors are not feasible due ; to e.g. dependence distance. -define void @test4(i32* %a, i32* %b) #0 { +define void @test4(ptr %a, ptr %b) #0 { ; CHECK: LV: Checking a loop in 'test4' ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF @@ -147,14 +147,14 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 8 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll index b8a5d59..47b159b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -45,20 +45,20 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test1 ; CHECK: <4 x i32> -define void @test1(i32* %a, i32* %b) #0 { +define void @test1(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 8 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0 @@ -90,20 +90,20 @@ exit: ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test2 ; CHECK: <4 x i32> -define void @test2(i32* %a, i32* %b) #0 { +define void @test2(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 4 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3 @@ -138,20 +138,20 @@ exit: ; CHECK-DBG: LV: Using user VF vscale x 2. ; CHECK-LABEL: @test3 ; CHECK: -define void @test3(i32* %a, i32* %b) #0 { +define void @test3(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 32 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6 @@ -190,20 +190,20 @@ exit: ; CHECK-DBG: LV: Selecting VF: vscale x 2. ; CHECK-LABEL: @test4 ; CHECK: -define void @test4(i32* %a, i32* %b) #0 { +define void @test4(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 32 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9 @@ -238,20 +238,20 @@ exit: ; CHECK-DBG: LV: Using user VF vscale x 4 ; CHECK-LABEL: @test5 ; CHECK: -define void @test5(i32* %a, i32* %b) #0 { +define void @test5(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 128 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12 @@ -289,20 +289,20 @@ exit: ; CHECK-DBG: Selecting VF: vscale x 4. ; CHECK-LABEL: @test6 ; CHECK: -define void @test6(i32* %a, i32* %b) #0 { +define void @test6(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 128 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15 @@ -322,18 +322,18 @@ exit: ; CHECK-NO-SVE-LABEL: @test_no_sve ; CHECK-NO-SVE: <4 x i32> ; CHECK-NO-SVE-NOT: -define void @test_no_sve(i32* %a, i32* %b) #0 { +define void @test_no_sve(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 - store i32 %add, i32* %arrayidx, align 4 + store i32 %add, ptr %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18 @@ -356,20 +356,20 @@ exit: ; CHECK-DBG: LV: Selecting VF: 4. ; CHECK-LABEL: @test_no_max_vscale ; CHECK: <4 x i32> -define void @test_no_max_vscale(i32* %a, i32* %b) #0 { +define void @test_no_max_vscale(ptr %a, ptr %b) #0 { entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 %add = add nsw i32 %1, %0 %2 = add nuw nsw i64 %iv, 4 - %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 - store i32 %add, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 + store i32 %add, ptr %arrayidx5, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, 1024 br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll index c2581b9..bd3a01b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll @@ -16,17 +16,17 @@ target triple = "aarch64-unknown-linux-gnu" ; architecture with masked loads/stores, but we use SVE for testing purposes ; here. -define void @foo(i32* %data1, i32* %data2) { +define void @foo(ptr %data1, ptr %data2) { ; CHECK-LABEL: @foo( ; CHECK: vector.body: ; CHECK: br i1 {{%.*}}, label %pred.store.if, label %pred.store.continue ; CHECK: pred.store.if: -; CHECK-NEXT: store i32 {{%.*}}, i32* {{%.*}} +; CHECK-NEXT: store i32 {{%.*}}, ptr {{%.*}} ; CHECK-NEXT: br label %pred.store.continue ; CHECK: pred.store.continue: ; CHECK-NEXT: br i1 {{%.*}}, label %pred.store.if1, label %pred.store.continue2 ; CHECK: pred.store.if1: -; CHECK-NEXT: store i32 {{%.*}}, i32* {{%.*}} +; CHECK-NEXT: store i32 {{%.*}}, ptr {{%.*}} ; CHECK-NEXT: br label %pred.store.continue2 ; CHECK: pred.store.continue2: @@ -35,13 +35,13 @@ entry: while.body: %i = phi i64 [ 1023, %entry ], [ %i.next, %if.end ] - %arrayidx = getelementptr inbounds i32, i32* %data1, i64 %i - %ld = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %data1, i64 %i + %ld = load i32, ptr %arrayidx, align 4 %cmp = icmp sgt i32 %ld, %ld br i1 %cmp, label %if.then, label %if.end if.then: - store i32 %ld, i32* %arrayidx, align 4 + store i32 %ld, ptr %arrayidx, align 4 br label %if.end if.end: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll index a0ef4f4..38fedc1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll @@ -7,7 +7,7 @@ target triple = "aarch64--linux-gnu" @Foo = common global %struct.anon zeroinitializer, align 4 ; CHECK-LABEL: @foo( -; CHECK: load <4 x i32>, <4 x i32>* +; CHECK: load <4 x i32>, ptr ; CHECK: sdiv <4 x i32> ; CHECK: store <4 x i32> @@ -17,11 +17,11 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 2, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 %div = sdiv i32 %0, 2 - %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv - store i32 %div, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 0, i64 %indvars.iv + store i32 %div, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 100 br i1 %exitcond, label %for.end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll index 604ac07..1cde8b9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" -define void @selects_1(i32* nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) { +define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) { ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cond = select i1 %cmp1, i32 10, i32 %and ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cond6 = select i1 %cmp2, i32 30, i32 %and ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6 @@ -27,8 +27,8 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 %and = and i32 %0, 2047 %cmp1 = icmp eq i32 %and, %A %cond = select i1 %cmp1, i32 10, i32 %and @@ -36,7 +36,7 @@ for.body: ; preds = %for.body.preheader, %cond6 = select i1 %cmp2, i32 30, i32 %and %cmp7 = icmp ugt i32 %cond, %C %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6 - store i32 %cond11, i32* %arrayidx, align 4 + store i32 %cond11, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll index 1052b7f..87347ef 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll @@ -7,23 +7,23 @@ target triple = "aarch64--linux-gnu" ; CHECK-LABEL: Checking a loop in 'interleaved_access' ; CHECK: The Smallest and Widest types: 64 / 64 bits ; -define void @interleaved_access(i8** %A, i64 %N) { +define void @interleaved_access(ptr %A, i64 %N) { for.ph: br label %for.body for.body: %i = phi i64 [ %i.next.3, %for.body ], [ 0, %for.ph ] - %tmp0 = getelementptr inbounds i8*, i8** %A, i64 %i - store i8* null, i8** %tmp0, align 8 + %tmp0 = getelementptr inbounds ptr, ptr %A, i64 %i + store ptr null, ptr %tmp0, align 8 %i.next.0 = add nuw nsw i64 %i, 1 - %tmp1 = getelementptr inbounds i8*, i8** %A, i64 %i.next.0 - store i8* null, i8** %tmp1, align 8 + %tmp1 = getelementptr inbounds ptr, ptr %A, i64 %i.next.0 + store ptr null, ptr %tmp1, align 8 %i.next.1 = add nsw i64 %i, 2 - %tmp2 = getelementptr inbounds i8*, i8** %A, i64 %i.next.1 - store i8* null, i8** %tmp2, align 8 + %tmp2 = getelementptr inbounds ptr, ptr %A, i64 %i.next.1 + store ptr null, ptr %tmp2, align 8 %i.next.2 = add nsw i64 %i, 3 - %tmp3 = getelementptr inbounds i8*, i8** %A, i64 %i.next.2 - store i8* null, i8** %tmp3, align 8 + %tmp3 = getelementptr inbounds ptr, ptr %A, i64 %i.next.2 + store ptr null, ptr %tmp3, align 8 %i.next.3 = add nsw i64 %i, 4 %cond = icmp slt i64 %i.next.3, %N br i1 %cond, label %for.body, label %for.end diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll index 16080e5..3127189 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll @@ -9,15 +9,15 @@ target triple="aarch64-unknown-linux-gnu" ; CHECK-VF4: Found an estimated cost of 21 for VF 4 For instruction: %add = fadd float %0, %sum.07 ; CHECK-VF8: Found an estimated cost of 42 for VF 8 For instruction: %add = fadd float %0, %sum.07 -define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) { +define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) { entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %iv - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 %add = fadd float %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -31,15 +31,15 @@ for.end: ; CHECK-VF4: Found an estimated cost of 18 for VF 4 For instruction: %add = fadd double %0, %sum.07 ; CHECK-VF8: Found an estimated cost of 36 for VF 8 For instruction: %add = fadd double %0, %sum.07 -define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) { +define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) { entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds double, double* %a, i64 %iv - %0 = load double, double* %arrayidx, align 4 + %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv + %0 = load double, ptr %arrayidx, align 4 %add = fadd double %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -52,17 +52,17 @@ for.end: ; CHECK-VF4: Found an estimated cost of 23 for VF 4 For instruction: %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) ; CHECK-VF8: Found an estimated cost of 46 for VF 8 For instruction: %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) -define float @fmuladd_strict32(float* %a, float* %b, i64 %n) { +define float @fmuladd_strict32(ptr %a, ptr %b, i64 %n) { entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %iv - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv - %1 = load float, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv + %1 = load float, ptr %arrayidx2, align 4 %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -77,17 +77,17 @@ declare float @llvm.fmuladd.f32(float, float, float) ; CHECK-VF4: Found an estimated cost of 22 for VF 4 For instruction: %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07) ; CHECK-VF8: Found an estimated cost of 44 for VF 8 For instruction: %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07) -define double @fmuladd_strict64(double* %a, double* %b, i64 %n) { +define double @fmuladd_strict64(ptr %a, ptr %b, i64 %n) { entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi double [ 0.000000e+00, %entry ], [ %muladd, %for.body ] - %arrayidx = getelementptr inbounds double, double* %a, i64 %iv - %0 = load double, double* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds double, double* %b, i64 %iv - %1 = load double, double* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv + %0 = load double, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds double, ptr %b, i64 %iv + %1 = load double, ptr %arrayidx2, align 4 %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07) %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll index 679c9dc..a06c5a7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll @@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-DEBUG: LV: Not interleaving scalar ordered reductions. -define void @foo(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %M, i64 %N) { +define void @foo(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %M, i64 %N) { ; CHECK-LABEL: @foo( ; CHECK-NOT: vector.body @@ -15,7 +15,7 @@ entry: for.body.us: ; preds = %entry, %for.cond3 %i.023.us = phi i64 [ %inc8.us, %for.cond3 ], [ 0, %entry ] - %arrayidx.us = getelementptr inbounds float, float* %dst, i64 %i.023.us + %arrayidx.us = getelementptr inbounds float, ptr %dst, i64 %i.023.us %mul.us = mul nsw i64 %i.023.us, %N br label %for.body3.us @@ -23,8 +23,8 @@ for.body3.us: ; preds = %for.body.us, %for.b %0 = phi float [ 0.000000e+00, %for.body.us ], [ %add6.us, %for.body3.us ] %j.021.us = phi i64 [ 0, %for.body.us ], [ %inc.us, %for.body3.us ] %add.us = add nsw i64 %j.021.us, %mul.us - %arrayidx4.us = getelementptr inbounds float, float* %src, i64 %add.us - %1 = load float, float* %arrayidx4.us, align 4 + %arrayidx4.us = getelementptr inbounds float, ptr %src, i64 %add.us + %1 = load float, ptr %arrayidx4.us, align 4 %add6.us = fadd float %1, %0 %inc.us = add nuw nsw i64 %j.021.us, 1 %exitcond.not = icmp eq i64 %inc.us, %N @@ -32,7 +32,7 @@ for.body3.us: ; preds = %for.body.us, %for.b for.cond3: ; preds = %for.body3.us %add6.us.lcssa = phi float [ %add6.us, %for.body3.us ] - store float %add6.us.lcssa, float* %arrayidx.us, align 4 + store float %add6.us.lcssa, ptr %arrayidx.us, align 4 %inc8.us = add nuw nsw i64 %i.023.us, 1 %exitcond26.not = icmp eq i64 %inc8.us, %M br i1 %exitcond26.not, label %exit, label %for.body.us diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll index a9b90f5..8ee0338 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll @@ -5,26 +5,26 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" -define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) { +define void @cmpsel_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @cmpsel_i32( ; CHECK-NEXT: entry: ; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load , * {{.*}}, align 4 +; CHECK: [[WIDE_LOAD:%.*]] = load , ptr {{.*}}, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select [[TMP1]], shufflevector ( insertelement ( poison, i32 2, i32 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i32 10, i32 0), poison, zeroinitializer) -; CHECK: store [[TMP2]], * {{.*}}, align 4 +; CHECK: store [[TMP2]], ptr {{.*}}, align 4 ; entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 %tobool.not = icmp eq i32 %0, 0 %cond = select i1 %tobool.not, i32 2, i32 10 - %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - store i32 %cond, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %cond, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0 @@ -36,26 +36,26 @@ for.end: ; preds = %for.end.loopexit, % ret void } -define void @cmpsel_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) { +define void @cmpsel_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @cmpsel_f32( ; CHECK-NEXT: entry: ; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load , * {{.*}}, align 4 +; CHECK: [[WIDE_LOAD:%.*]] = load , ptr {{.*}}, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP2:%.*]] = select [[TMP1]], shufflevector ( insertelement ( poison, float 1.000000e+01, i32 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, float 2.000000e+00, i32 0), poison, zeroinitializer) -; CHECK: store [[TMP2]], * {{.*}}, align 4 +; CHECK: store [[TMP2]], ptr {{.*}}, align 4 entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 %cmp1 = fcmp ogt float %0, 3.000000e+00 %conv = select i1 %cmp1, float 1.000000e+01, float 2.000000e+00 - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv - store float %conv, float* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + store float %conv, ptr %arrayidx3, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -64,24 +64,24 @@ for.end: ; preds = %for.body, %entry ret void } -define void @fneg_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) { +define void @fneg_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @fneg_f32( ; CHECK-NEXT: entry: ; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load , * {{.*}}, align 4 +; CHECK: [[WIDE_LOAD:%.*]] = load , ptr {{.*}}, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fneg [[WIDE_LOAD]] -; CHECK: store [[TMP1]], * {{.*}}, align 4 +; CHECK: store [[TMP1]], ptr {{.*}}, align 4 entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 %fneg = fneg float %0 - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv - store float %fneg, float* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + store float %fneg, ptr %arrayidx3, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll index 6d1bb88..a5ebd00 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -2,7 +2,7 @@ ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - | FileCheck %s -define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 { +define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %cond, ptr noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @cond_inv_load_i32i32i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -14,20 +14,18 @@ define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias noc ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16* [[INV:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0i16( [[BROADCAST_SPLAT]], i32 2, [[TMP6]], poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i16.nxv4p0( [[BROADCAST_SPLAT]], i32 2, [[TMP6]], poison) ; CHECK-NEXT: [[TMP7:%.*]] = sext [[WIDE_MASKED_GATHER]] to -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[TMP7]], * [[TMP9]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP7]], ptr [[TMP8]], i32 4, [[TMP6]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] @@ -41,15 +39,15 @@ define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias noc ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[I_07]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[I_07]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP13]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[INV]], align 2 +; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[INV]], align 2 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_07]] -; CHECK-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_07]] +; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1 @@ -63,16 +61,16 @@ entry: for.body: ; preds = %entry, %for.inc %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %i.07 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %i.07 + %0 = load i32, ptr %arrayidx, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %for.inc, label %if.then if.then: ; preds = %for.body - %1 = load i16, i16* %inv, align 2 + %1 = load i16, ptr %inv, align 2 %conv = sext i16 %1 to i32 - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07 - store i32 %conv, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07 + store i32 %conv, ptr %arrayidx1, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -84,7 +82,7 @@ exit: ; preds = %for.inc ret void } -define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 { +define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocapture readonly %cond, ptr noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @cond_inv_load_f64f64f64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -96,19 +94,17 @@ define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noali ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, double* [[INV:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP4]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 4.000000e-01, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f64.nxv4p0f64( [[BROADCAST_SPLAT]], i32 8, [[TMP6]], poison) -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0nxv4f64( [[WIDE_MASKED_GATHER]], * [[TMP8]], i32 8, [[TMP6]]) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f64.nxv4p0( [[BROADCAST_SPLAT]], i32 8, [[TMP6]], poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4f64.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, [[TMP6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -122,14 +118,14 @@ define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noali ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[COND]], i64 [[I_08]] -; CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]] +; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[TMP12]], 4.000000e-01 ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP13:%.*]] = load double, double* [[INV]], align 8 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]] -; CHECK-NEXT: store double [[TMP13]], double* [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[INV]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] +; CHECK-NEXT: store double [[TMP13]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 @@ -143,15 +139,15 @@ entry: for.body: ; preds = %entry, %for.inc %i.08 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08 - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08 + %0 = load double, ptr %arrayidx, align 8 %cmp1 = fcmp ogt double %0, 4.000000e-01 br i1 %cmp1, label %if.then, label %for.inc if.then: ; preds = %for.body - %1 = load double, double* %inv, align 8 - %arrayidx2 = getelementptr inbounds double, double* %a, i64 %i.08 - store double %1, double* %arrayidx2, align 8 + %1 = load double, ptr %inv, align 8 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %i.08 + store double %1, ptr %arrayidx2, align 8 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -163,7 +159,7 @@ exit: ; preds = %for.inc ret void } -define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 { +define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %cond, i64 %n) #0 { ; CHECK-LABEL: @invariant_load_cond( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -178,21 +174,18 @@ define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture reado ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 42 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32* [[TMP4]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP6]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[DOTSPLAT]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[DOTSPLAT]], i32 4, [[TMP7]], poison) ; CHECK-NEXT: [[TMP10:%.*]] = add nsw [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[TMP10]], * [[TMP12]], i32 4, [[TMP7]]) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP10]], ptr [[TMP11]], i32 4, [[TMP7]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]] @@ -206,18 +199,18 @@ define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture reado ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP16]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 42 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 42 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] -; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 @@ -231,19 +224,19 @@ entry: for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] - %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 42 - %arrayidx2 = getelementptr inbounds i32, i32* %cond, i64 %iv - %0 = load i32, i32* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 42 + %arrayidx2 = getelementptr inbounds i32, ptr %cond, i64 %iv + %0 = load i32, ptr %arrayidx2, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %for.inc, label %if.then if.then: - %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx3, align 4 - %2 = load i32, i32* %arrayidx1, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx3, align 4 + %2 = load i32, ptr %arrayidx1, align 4 %add = add nsw i32 %2, %1 - %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %iv - store i32 %add, i32* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 br label %for.inc for.inc: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll index 64a0d7d..3fc1a15 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll @@ -5,19 +5,19 @@ target triple="aarch64--linux-gnu" ; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, float* %arrayidx3, align 4 -define void @gather_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx3, align 4 +define void @gather_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv - %0 = load i64, i64* %arrayidx, align 8 - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %0 - %1 = load float, float* %arrayidx3, align 4 - %arrayidx5 = getelementptr inbounds float, float* %c, i64 %indvars.iv - store float %1, float* %arrayidx5, align 4 + %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %0 + %1 = load float, ptr %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %indvars.iv + store float %1, ptr %arrayidx5, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -27,19 +27,19 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo } ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_loaded_index' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %1, float* %arrayidx5, align 4 -define void @scatter_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %1, ptr %arrayidx5, align 4 +define void @scatter_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv - %0 = load i64, i64* %arrayidx, align 8 - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv - %1 = load float, float* %arrayidx3, align 4 - %arrayidx5 = getelementptr inbounds float, float* %c, i64 %0 - store float %1, float* %arrayidx5, align 4 + %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + %1 = load float, ptr %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %0 + store float %1, ptr %arrayidx5, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -51,18 +51,18 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and a cost of 1. ; CHECK: LV: Checking a loop in 'gather_nxv4i32_unknown_stride' -; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 -define void @gather_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 +define void @gather_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %indvars.iv.stride2 = mul i64 %indvars.iv, %stride - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2 - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv - store float %0, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + store float %0, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -74,18 +74,18 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and cost is 1. ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_unknown_stride' -; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 -define void @scatter_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4 +define void @scatter_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %indvars.iv.stride2 = mul i64 %indvars.iv, %stride - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2 - store float %0, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2 + store float %0, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -95,18 +95,18 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo } ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 -define void @gather_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 +define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %indvars.iv.stride2 = mul i64 %indvars.iv, 2 - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2 - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv - store float %0, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + store float %0, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -116,18 +116,18 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo } ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride2' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 -define void @scatter_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4 +define void @scatter_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %indvars.iv.stride2 = mul i64 %indvars.iv, 2 - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2 - store float %0, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2 + store float %0, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -138,18 +138,18 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride64' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 -define void @gather_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 +define void @gather_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %indvars.iv.stride2 = mul i64 %indvars.iv, 64 - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2 - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv - store float %0, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + store float %0, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -159,18 +159,18 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo } ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride64' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 -define void @scatter_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { +; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4 +define void @scatter_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %indvars.iv.stride2 = mul i64 %indvars.iv, 64 - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2 - store float %0, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2 + store float %0, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index 14db7f2..7a630fe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -2,7 +2,7 @@ ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S %s -force-target-instruction-cost=1 -o - | FileCheck %s -define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { +define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 { ; CHECK-LABEL: @gather_nxv4i32_ind64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -17,14 +17,12 @@ define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noa ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], [[WIDE_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0f32( [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to * -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], * [[TMP8]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[WIDE_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP6]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] @@ -38,12 +36,12 @@ define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noa ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP13]], float* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[TMP13]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -55,12 +53,12 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv - %0 = load i64, i64* %arrayidx, align 8 - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %0 - %1 = load float, float* %arrayidx3, align 4 - %arrayidx5 = getelementptr inbounds float, float* %c, i64 %indvars.iv - store float %1, float* %arrayidx5, align 4 + %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %0 + %1 = load float, ptr %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %indvars.iv + store float %1, ptr %arrayidx5, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -72,7 +70,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo ; NOTE: I deliberately chose '%b' as an array of i32 indices, since the ; additional 'sext' in the for.body loop exposes additional code paths ; during vectorisation. -define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 { +define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i64 %n) #0 { ; CHECK-LABEL: @scatter_nxv4i32_ind32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -87,15 +85,13 @@ define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias noc ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to * -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , * [[TMP7]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = sext [[WIDE_LOAD1]] to -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], [[TMP8]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32( [[WIDE_LOAD]], [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], [[TMP8]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4f32.nxv4p0( [[WIDE_LOAD]], [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer)) ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] @@ -109,13 +105,13 @@ define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias noc ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IDXPROM4]] -; CHECK-NEXT: store float [[TMP13]], float* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM4]] +; CHECK-NEXT: store float [[TMP13]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -127,13 +123,13 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %c, i64 %indvars.iv - %0 = load float, float* %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv - %1 = load i32, i32* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds float, ptr %c, i64 %indvars.iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + %1 = load i32, ptr %arrayidx3, align 4 %idxprom4 = sext i32 %1 to i64 - %arrayidx5 = getelementptr inbounds float, float* %a, i64 %idxprom4 - store float %0, float* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %idxprom4 + store float %0, ptr %arrayidx5, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 @@ -142,7 +138,7 @@ for.cond.cleanup: ; preds = %for.body, %entry ret void } -define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 { +define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @scatter_inv_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -154,16 +150,15 @@ define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocap ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32* [[INV:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( shufflevector ( insertelement ( poison, i32 3, i32 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP6]]) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( shufflevector ( insertelement ( poison, i32 3, i32 0), poison, zeroinitializer), [[BROADCAST_SPLAT]], i32 4, [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] @@ -177,12 +172,12 @@ define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocap ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store i32 3, i32* [[INV]], align 4 +; CHECK-NEXT: store i32 3, ptr [[INV]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -196,13 +191,13 @@ entry: for.body: ; preds = %entry, %for.inc %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 %tobool.not = icmp eq i32 %0, 0 br i1 %tobool.not, label %for.inc, label %if.then if.then: ; preds = %for.body - store i32 3, i32* %inv, align 4 + store i32 3, ptr %inv, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -214,7 +209,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo ret void } -define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 { +define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %inv, i64 %n) #0 { ; CHECK-LABEL: @gather_inv_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -226,18 +221,16 @@ define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocaptur ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32* [[INV:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[INV:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP5]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( [[BROADCAST_SPLAT]], i32 4, [[TMP6]], poison) -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP4]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[WIDE_MASKED_GATHER]], * [[TMP7]], i32 4, [[TMP6]]) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP6]], poison) +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_GATHER]], ptr [[TMP4]], i32 4, [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] @@ -251,13 +244,13 @@ define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocaptur ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP11]], 3 ; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[INV]], align 4 -; CHECK-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[INV]], align 4 +; CHECK-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 @@ -271,14 +264,14 @@ entry: for.body: ; preds = %entry, %for.inc %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 %cmp2 = icmp sgt i32 %0, 3 br i1 %cmp2, label %if.then, label %for.inc if.then: ; preds = %for.body - %1 = load i32, i32* %inv, align 4 - store i32 %1, i32* %arrayidx, align 4 + %1 = load i32, ptr %inv, align 4 + store i32 %1, ptr %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -292,7 +285,7 @@ for.cond.cleanup: ; preds = %for.inc, %entry -define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) #0 { +define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @gather_nxv4i32_ind64_stride2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -316,19 +309,17 @@ define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* no ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP8:%.*]] = shl [[STEP_ADD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[B:%.*]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[B]], [[TMP8]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0f32( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0f32( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to * -; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], * [[TMP12]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], [[TMP8]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP9]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to * -; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], * [[TMP17]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP15]] +; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] @@ -344,10 +335,10 @@ define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* no ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDVARS_IV_STRIDE2:%.*]] = shl i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_STRIDE2]] -; CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP21]], float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV_STRIDE2]] +; CHECK-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[TMP21]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -360,10 +351,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %indvars.iv.stride2 = mul i64 %indvars.iv, 2 - %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2 - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv - store float %0, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv + store float %0, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond.not, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll index 84b1391..739010b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -11,7 +11,7 @@ target triple = "aarch64-linux-gnu" ; a[i] = b[i]; ; } -define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 { +define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @cond_ind64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() @@ -33,12 +33,10 @@ define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readon ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = trunc [[VEC_IND]] to -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to * -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP9]], i32 4, [[TMP7]], poison) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[WIDE_MASKED_LOAD]], * [[TMP11]], i32 4, [[TMP7]]) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, [[TMP7]], poison) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr [[TMP10]], i32 4, [[TMP7]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]] @@ -57,10 +55,10 @@ define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readon ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[AND]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_08]] -; CHECK-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_08]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]] +; CHECK-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 @@ -79,10 +77,10 @@ for.body: ; preds = %entry, %for.inc br i1 %tobool.not, label %for.inc, label %if.then if.then: ; preds = %for.body - %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.08 - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.08 - store i32 %0, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.08 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.08 + store i32 %0, ptr %arrayidx1, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll index 90a7b10..05a6ebe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll @@ -1,28 +1,28 @@ ; RUN: opt -S -passes=loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s -define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) { +define void @invariant_load(i64 %n, ptr noalias nocapture %a, ptr nocapture readonly %b) { ; CHECK-LABEL: @invariant_load ; CHECK: vector.body: -; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42 -; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, i32* %[[GEP]] +; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, ptr %b, i64 42 +; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, ptr %[[GEP]] ; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement poison, i32 %[[INVLOAD]], i32 0 ; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector %[[SPLATINS]], poison, zeroinitializer -; CHECK: %[[LOAD:.*]] = load , * +; CHECK: %[[LOAD:.*]] = load , ptr ; CHECK-NEXT: %[[ADD:.*]] = add nsw %[[SPLAT]], %[[LOAD]] -; CHECK: store %[[ADD]], * +; CHECK: store %[[ADD]], ptr entry: br label %for.body for.body: ; preds = %for.body.lr.ph, %for.body %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %b, i64 42 - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv - %1 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %b, i64 42 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %iv + %1 = load i32, ptr %arrayidx1, align 4 %add = add nsw i32 %0, %1 - %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %iv - store i32 %add, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx2, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll index 51fc8f9..31abba4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll @@ -1,25 +1,25 @@ ; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize,dce,instcombine -S \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue <%s | FileCheck %s -define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 { +define void @stride7_i32(ptr noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_i32( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] ; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw %[[VEC_IND]], shufflevector ( insertelement ( poison, i64 7, i32 0), poison, zeroinitializer) -; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds i32, i32* %dst, %[[PTR_INDICES]] -; CHECK-NEXT: %[[GLOAD:.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0i32( %[[PTRS]] +; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds i32, ptr %dst, %[[PTR_INDICES]] +; CHECK-NEXT: %[[GLOAD:.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( %[[PTRS]] ; CHECK-NEXT: %[[VALS:.*]] = add nsw %[[GLOAD]], -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32( %[[VALS]], %[[PTRS]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( %[[VALS]], %[[PTRS]] entry: br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] %mul = mul nuw nsw i64 %i.05, 7 - %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %mul - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %mul + %0 = load i32, ptr %arrayidx, align 4 %add = add nsw i32 %0, 3 - store i32 %add, i32* %arrayidx, align 4 + store i32 %add, ptr %arrayidx, align 4 %inc = add nuw nsw i64 %i.05, 1 %exitcond.not = icmp eq i64 %inc, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -28,25 +28,25 @@ for.end: ; preds = %for.end.loopexit, % ret void } -define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 { +define void @stride7_f64(ptr noalias nocapture %dst, i64 %n) #0 { ; CHECK-LABEL: @stride7_f64( ; CHECK: vector.body ; CHECK: %[[VEC_IND:.*]] = phi [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ] ; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw %[[VEC_IND]], shufflevector ( insertelement ( poison, i64 7, i32 0), poison, zeroinitializer) -; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, %[[PTR_INDICES]] -; CHECK-NEXT: %[[GLOAD:.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0f64( %[[PTRS]], +; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, %[[PTR_INDICES]] +; CHECK-NEXT: %[[GLOAD:.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( %[[PTRS]], ; CHECK-NEXT: %[[VALS:.*]] = fadd %[[GLOAD]], -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( %[[VALS]], %[[PTRS]], +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( %[[VALS]], %[[PTRS]], entry: br label %for.body for.body: ; preds = %entry, %for.body %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ] %mul = mul nuw nsw i64 %i.05, 7 - %arrayidx = getelementptr inbounds double, double* %dst, i64 %mul - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %dst, i64 %mul + %0 = load double, ptr %arrayidx, align 8 %add = fadd double %0, 1.000000e+00 - store double %add, double* %arrayidx, align 8 + store double %add, ptr %arrayidx, align 8 %inc = add nuw nsw i64 %i.05, 1 %exitcond.not = icmp eq i64 %inc, %n br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6 @@ -56,30 +56,30 @@ for.end: ; preds = %for.end.loopexit, % } -define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 { +define void @cond_stride7_f64(ptr noalias nocapture %dst, ptr noalias nocapture readonly %cond, i64 %n) #0 { ; CHECK-LABEL: @cond_stride7_f64( ; CHECK: vector.body ; CHECK: %[[MASK:.*]] = icmp ne -; CHECK: %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, %{{.*}} -; CHECK-NEXT: %[[GLOAD:.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0f64( %[[PTRS]], i32 8, %[[MASK]] +; CHECK: %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, %{{.*}} +; CHECK-NEXT: %[[GLOAD:.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( %[[PTRS]], i32 8, %[[MASK]] ; CHECK-NEXT: %[[VALS:.*]] = fadd %[[GLOAD]], -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64( %[[VALS]], %[[PTRS]], i32 8, %[[MASK]]) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( %[[VALS]], %[[PTRS]], i32 8, %[[MASK]]) entry: br label %for.body for.body: ; preds = %entry, %for.inc %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %cond, i64 %i.07 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %cond, i64 %i.07 + %0 = load i64, ptr %arrayidx, align 8 %tobool.not = icmp eq i64 %0, 0 br i1 %tobool.not, label %for.inc, label %if.then if.then: ; preds = %for.body %mul = mul nsw i64 %i.07, 7 - %arrayidx1 = getelementptr inbounds double, double* %dst, i64 %mul - %1 = load double, double* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %dst, i64 %mul + %1 = load double, ptr %arrayidx1, align 8 %add = fadd double %1, 1.000000e+00 - store double %add, double* %arrayidx1, align 8 + store double %add, ptr %arrayidx1, align 8 br label %for.inc for.inc: ; preds = %for.body, %if.then diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll index eac9701..5a87b3b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll @@ -2,14 +2,14 @@ target triple = "aarch64-unknown-linux-gnu" -define void @trip7_i64(i64* noalias nocapture noundef %dst, i64* noalias nocapture noundef readonly %src) #0 { +define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { ; CHECK-LABEL: @trip7_i64( ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK: [[ACTIVE_LANE_MASK:%.*]] = phi [ {{%.*}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ] -; CHECK: {{%.*}} = call @llvm.masked.load.nxv2i64.p0nxv2i64(* {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK: {{%.*}} = call @llvm.masked.load.nxv2i64.p0nxv2i64(* {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK: call void @llvm.masked.store.nxv2i64.p0nxv2i64( {{%.*}}, * {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK: {{%.*}} = call @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK: {{%.*}} = call @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK: call void @llvm.masked.store.nxv2i64.p0( {{%.*}}, ptr {{%.*}}, i32 8, [[ACTIVE_LANE_MASK]]) ; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]] @@ -23,13 +23,13 @@ entry: for.body: ; preds = %entry, %for.body %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.06 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.06 + %0 = load i64, ptr %arrayidx, align 8 %mul = shl nsw i64 %0, 1 - %arrayidx1 = getelementptr inbounds i64, i64* %dst, i64 %i.06 - %1 = load i64, i64* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.06 + %1 = load i64, ptr %arrayidx1, align 8 %add = add nsw i64 %1, %mul - store i64 %add, i64* %arrayidx1, align 8 + store i64 %add, ptr %arrayidx1, align 8 %inc = add nuw nsw i64 %i.06, 1 %exitcond.not = icmp eq i64 %inc, 7 br i1 %exitcond.not, label %for.end, label %for.body @@ -38,19 +38,19 @@ for.end: ; preds = %for.body ret void } -define void @trip5_i8(i8* noalias nocapture noundef %dst, i8* noalias nocapture noundef readonly %src) #0 { +define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 { ; CHECK-LABEL: @trip5_i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[I_08]] -; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[I_08]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP0]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DST:%.*]], i64 [[I_08]] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]] -; CHECK-NEXT: store i8 [[ADD]], i8* [[ARRAYIDX1]], align 1 +; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -62,13 +62,13 @@ entry: for.body: ; preds = %entry, %for.body %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] - %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.08 - %0 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08 + %0 = load i8, ptr %arrayidx, align 1 %mul = shl i8 %0, 1 - %arrayidx1 = getelementptr inbounds i8, i8* %dst, i64 %i.08 - %1 = load i8, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08 + %1 = load i8, ptr %arrayidx1, align 1 %add = add i8 %mul, %1 - store i8 %add, i8* %arrayidx1, align 1 + store i8 %add, ptr %arrayidx1, align 1 %inc = add nuw nsw i64 %i.08, 1 %exitcond.not = icmp eq i64 %inc, 5 br i1 %exitcond.not, label %for.end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll index bd1e289..820fd88 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll @@ -1,32 +1,30 @@ ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - | FileCheck %s -define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) { +define void @mloadstore_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @mloadstore_f32 ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load , * +; CHECK: %[[LOAD1:.*]] = load , ptr ; CHECK-NEXT: %[[MASK:.*]] = fcmp ogt %[[LOAD1]], -; CHECK-NEXT: %[[GEPA:.*]] = getelementptr float, float* %a, -; CHECK-NEXT: %[[MLOAD_PTRS:.*]] = bitcast float* %[[GEPA]] to * -; CHECK-NEXT: %[[LOAD2:.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* %[[MLOAD_PTRS]], i32 4, %[[MASK]] +; CHECK-NEXT: %[[GEPA:.*]] = getelementptr float, ptr %a, +; CHECK-NEXT: %[[LOAD2:.*]] = call @llvm.masked.load.nxv4f32.p0(ptr %[[GEPA]], i32 4, %[[MASK]] ; CHECK-NEXT: %[[FADD:.*]] = fadd %[[LOAD1]], %[[LOAD2]] -; CHECK-NEXT: %[[MSTORE_PTRS:.*]] = bitcast float* %[[GEPA]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0nxv4f32( %[[FADD]], * %[[MSTORE_PTRS]], i32 4, %[[MASK]]) +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( %[[FADD]], ptr %[[GEPA]], i32 4, %[[MASK]]) entry: br label %for.body for.body: ; preds = %entry, %for.inc %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %b, i64 %i.011 - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.011 + %0 = load float, ptr %arrayidx, align 4 %cmp1 = fcmp ogt float %0, 0.000000e+00 br i1 %cmp1, label %if.then, label %for.inc if.then: ; preds = %for.body - %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.011 - %1 = load float, float* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.011 + %1 = load float, ptr %arrayidx3, align 4 %add = fadd float %0, %1 - store float %add, float* %arrayidx3, align 4 + store float %add, ptr %arrayidx3, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then @@ -38,32 +36,30 @@ exit: ; preds = %for.inc ret void } -define void @mloadstore_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) { +define void @mloadstore_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @mloadstore_i32 ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load , * +; CHECK: %[[LOAD1:.*]] = load , ptr ; CHECK-NEXT: %[[MASK:.*]] = icmp ne %[[LOAD1]], -; CHECK-NEXT: %[[GEPA:.*]] = getelementptr i32, i32* %a, -; CHECK-NEXT: %[[MLOAD_PTRS:.*]] = bitcast i32* %[[GEPA]] to * -; CHECK-NEXT: %[[LOAD2:.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* %[[MLOAD_PTRS]], i32 4, %[[MASK]] +; CHECK-NEXT: %[[GEPA:.*]] = getelementptr i32, ptr %a, +; CHECK-NEXT: %[[LOAD2:.*]] = call @llvm.masked.load.nxv4i32.p0(ptr %[[GEPA]], i32 4, %[[MASK]] ; CHECK-NEXT: %[[FADD:.*]] = add %[[LOAD1]], %[[LOAD2]] -; CHECK-NEXT: %[[MSTORE_PTRS:.*]] = bitcast i32* %[[GEPA]] to * -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( %[[FADD]], * %[[MSTORE_PTRS]], i32 4, %[[MASK]]) +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( %[[FADD]], ptr %[[GEPA]], i32 4, %[[MASK]]) entry: br label %for.body for.body: ; preds = %entry, %for.inc %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.011 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.011 + %0 = load i32, ptr %arrayidx, align 4 %cmp1 = icmp ne i32 %0, 0 br i1 %cmp1, label %if.then, label %for.inc if.then: ; preds = %for.body - %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %i.011 - %1 = load i32, i32* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %i.011 + %1 = load i32, ptr %arrayidx3, align 4 %add = add i32 %0, %1 - store i32 %add, i32* %arrayidx3, align 4 + store i32 %add, ptr %arrayidx3, align 4 br label %for.inc for.inc: ; preds = %for.body, %if.then diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll index 4296b46..4304105 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll @@ -20,7 +20,7 @@ ; CHECK-LABEL: @scalable_load_in_loop ; CHECK-NOT: vector.body -define void @scalable_load_in_loop(i64 %n, * %x, * %y) { +define void @scalable_load_in_loop(i64 %n, ptr %x, ptr %y) { entry: br label %for.body @@ -31,8 +31,8 @@ for.body: br i1 %cmp, label %for.inc, label %if.end if.end: - %0 = load , * %y - store %0, * %x + %0 = load , ptr %y + store %0, ptr %x br label %for.inc for.inc: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll index f960164..cf5fdc4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll @@ -5,7 +5,7 @@ target triple = "aarch64-linux-gnu" -define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 { +define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i32 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] @@ -47,8 +47,8 @@ entry: for.body: ; preds = %entry, %for.body %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] %1 = phi i32 [ 3, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds i32, i32* %v, i64 %0 - %3 = load i32, i32* %2, align 4 + %2 = getelementptr inbounds i32, ptr %v, i64 %0 + %3 = load i32, ptr %2, align 4 %4 = icmp eq i32 %3, 3 %5 = select i1 %4, i32 %1, i32 7 %6 = add nuw nsw i64 %0, 1 @@ -59,7 +59,7 @@ exit: ; preds = %for.body ret i32 %5 } -define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { +define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_i32_from_icmp ; CHECK-VF4IC1: vector.ph: ; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement poison, i32 %a, i32 0 @@ -86,8 +86,8 @@ entry: for.body: ; preds = %entry, %for.body %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] %1 = phi i32 [ %a, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds i32, i32* %v, i64 %0 - %3 = load i32, i32* %2, align 4 + %2 = getelementptr inbounds i32, ptr %v, i64 %0 + %3 = load i32, ptr %2, align 4 %4 = icmp eq i32 %3, 3 %5 = select i1 %4, i32 %1, i32 %b %6 = add nuw nsw i64 %0, 1 @@ -98,7 +98,7 @@ exit: ; preds = %for.body ret i32 %5 } -define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) #0 { +define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 2, i32 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] @@ -118,8 +118,8 @@ entry: for.body: ; preds = %entry, %for.body %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] %1 = phi i32 [ 2, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds float, float* %v, i64 %0 - %3 = load float, float* %2, align 4 + %2 = getelementptr inbounds float, ptr %v, i64 %0 + %3 = load float, ptr %2, align 4 %4 = fcmp fast ueq float %3, 3.0 %5 = select i1 %4, i32 %1, i32 1 %6 = add nuw nsw i64 %0, 1 @@ -130,7 +130,7 @@ exit: ; preds = %for.body ret i32 %5 } -define float @select_const_f32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 { +define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_f32_from_icmp ; CHECK-VF4IC1-NOT: vector.body ; CHECK-VF4IC4-LABEL: @select_const_f32_from_icmp @@ -141,8 +141,8 @@ entry: for.body: ; preds = %entry, %for.body %0 = phi i64 [ 0, %entry ], [ %6, %for.body ] %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ] - %2 = getelementptr inbounds i32, i32* %v, i64 %0 - %3 = load i32, i32* %2, align 4 + %2 = getelementptr inbounds i32, ptr %v, i64 %0 + %3 = load i32, ptr %2, align 4 %4 = icmp eq i32 %3, 3 %5 = select fast i1 %4, float %1, float 7.0 %6 = add nuw nsw i64 %0, 1 @@ -153,13 +153,13 @@ exit: ; preds = %for.body ret float %5 } -define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) #0 { +define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: ; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt [[VEC_LOAD]], shufflevector ( insertelement ( poison, i32 35, i32 0), poison, zeroinitializer) -; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* {{%.*}}, i32 4, [[MASK]], poison) +; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, [[MASK]], poison) ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq [[MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 2, i32 0), poison, zeroinitializer) ; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = select [[VEC_ICMP]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer), [[VEC_PHI]] ; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select [[MASK]], [[VEC_SEL_TMP]], [[VEC_PHI]] @@ -176,14 +176,14 @@ entry: for.body: ; preds = %entry, %for.inc %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %i.013 - %0 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %i.013 + %0 = load i32, ptr %arrayidx, align 4 %cmp1 = icmp sgt i32 %0, 35 br i1 %cmp1, label %if.then, label %for.inc if.then: ; preds = %for.body - %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %i.013 - %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %i.013 + %1 = load i32, ptr %arrayidx2, align 4 %cmp3 = icmp eq i32 %1, 2 %spec.select = select i1 %cmp3, i32 1, i32 %r.012 br label %for.inc diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll index 1ce996c..36dc30d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll @@ -12,15 +12,15 @@ target triple="aarch64-unknown-linux-gnu" ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction: %add = fadd float %0, %sum.07 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07 -define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) #0 { +define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) #0 { entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds float, float* %a, i64 %iv - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 %add = fadd float %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -34,15 +34,15 @@ for.end: ; CHECK: Found an estimated cost of 8 for VF vscale x 2 For instruction: %add = fadd double %0, %sum.07 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction: %add = fadd double %0, %sum.07 -define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) #0 { +define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) #0 { entry: br label %for.body for.body: %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] %sum.07 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds double, double* %a, i64 %iv - %0 = load double, double* %arrayidx, align 4 + %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv + %0 = load double, ptr %arrayidx, align 4 %add = fadd double %0, %sum.07 %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll index aa8ed299..bd3f222 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll @@ -3,7 +3,7 @@ target triple = "aarch64-unknown-linux-gnu" -define void @f16_to_f32(float* noalias nocapture %dst, half* noalias nocapture readonly %src, i64 %N) #0 { +define void @f16_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @f16_to_f32( ; CHECK: vector.body ; CHECK: %{{.*}} = fpext %{{.*}} to @@ -12,11 +12,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds half, half* %src, i64 %i.07 - %0 = load half, half* %arrayidx, align 2 + %arrayidx = getelementptr inbounds half, ptr %src, i64 %i.07 + %0 = load half, ptr %arrayidx, align 2 %conv = fpext half %0 to float - %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07 - store float %conv, float* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07 + store float %conv, ptr %arrayidx1, align 4 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -26,7 +26,7 @@ for.end: ; preds = %for.body, %entry } -define void @f64_to_f32(float* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %N) #0 { +define void @f64_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @f64_to_f32( ; CHECK: vector.body ; CHECK: %{{.*}} = fptrunc %{{.*}} to @@ -35,11 +35,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds double, double* %src, i64 %i.07 - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %src, i64 %i.07 + %0 = load double, ptr %arrayidx, align 8 %conv = fptrunc double %0 to float - %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07 - store float %conv, float* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07 + store float %conv, ptr %arrayidx1, align 4 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -49,7 +49,7 @@ for.end: ; preds = %for.body, %entry } -define void @f16_to_s8(i8* noalias nocapture %dst, half* noalias nocapture readonly %src, i64 %N) #0 { +define void @f16_to_s8(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @f16_to_s8( ; CHECK: vector.body ; CHECK: %{{.*}} = fptosi %{{.*}} to @@ -58,11 +58,11 @@ entry: for.body: ; preds = %entry, %for.body %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds half, half* %src, i64 %i.08 - %0 = load half, half* %arrayidx, align 2 + %arrayidx = getelementptr inbounds half, ptr %src, i64 %i.08 + %0 = load half, ptr %arrayidx, align 2 %conv1 = fptosi half %0 to i8 - %arrayidx2 = getelementptr inbounds i8, i8* %dst, i64 %i.08 - store i8 %conv1, i8* %arrayidx2, align 1 + %arrayidx2 = getelementptr inbounds i8, ptr %dst, i64 %i.08 + store i8 %conv1, ptr %arrayidx2, align 1 %inc = add nuw nsw i64 %i.08, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -72,7 +72,7 @@ for.end: ; preds = %for.body, %entry } -define void @f32_to_u64(i64* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %N) #0 { +define void @f32_to_u64(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @f32_to_u64( ; CHECK: vector.body ; CHECK: %{{.*}} = fptoui %{{.*}} to @@ -81,11 +81,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07 - %0 = load float, float* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07 + %0 = load float, ptr %arrayidx, align 4 %conv = fptoui float %0 to i64 - %arrayidx1 = getelementptr inbounds i64, i64* %dst, i64 %i.07 - store i64 %conv, i64* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.07 + store i64 %conv, ptr %arrayidx1, align 8 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -95,7 +95,7 @@ for.end: ; preds = %for.body, %entry } -define void @s8_to_f32(float* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 { +define void @s8_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @s8_to_f32( ; CHECK: vector.body ; CHECK: %{{.*}} = sitofp %{{.*}} to @@ -104,11 +104,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07 - %0 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07 + %0 = load i8, ptr %arrayidx, align 1 %conv = sitofp i8 %0 to float - %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07 - store float %conv, float* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07 + store float %conv, ptr %arrayidx1, align 4 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -118,7 +118,7 @@ for.end: ; preds = %for.body, %entry } -define void @u16_to_f32(float* noalias nocapture %dst, i16* noalias nocapture readonly %src, i64 %N) #0 { +define void @u16_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @u16_to_f32( ; CHECK: vector.body ; CHECK: %{{.*}} = uitofp %{{.*}} to @@ -127,11 +127,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i16, i16* %src, i64 %i.07 - %0 = load i16, i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, ptr %src, i64 %i.07 + %0 = load i16, ptr %arrayidx, align 2 %conv = uitofp i16 %0 to float - %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07 - store float %conv, float* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07 + store float %conv, ptr %arrayidx1, align 4 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -141,7 +141,7 @@ for.end: ; preds = %for.body, %entry } -define void @u64_to_f16(half* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 { +define void @u64_to_f16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @u64_to_f16( ; CHECK: vector.body ; CHECK: %{{.*}} = uitofp %{{.*}} to @@ -150,11 +150,11 @@ entry: for.body: ; preds = %entry, %for.body %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.08 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.08 + %0 = load i64, ptr %arrayidx, align 8 %conv1 = uitofp i64 %0 to half - %arrayidx2 = getelementptr inbounds half, half* %dst, i64 %i.08 - store half %conv1, half* %arrayidx2, align 2 + %arrayidx2 = getelementptr inbounds half, ptr %dst, i64 %i.08 + store half %conv1, ptr %arrayidx2, align 2 %inc = add nuw nsw i64 %i.08, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -164,7 +164,7 @@ for.end: ; preds = %for.body, %entry } -define void @s64_to_f16(half* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 { +define void @s64_to_f16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @s64_to_f16( ; CHECK: vector.body ; CHECK: %{{.*}} = sitofp %{{.*}} to @@ -173,11 +173,11 @@ entry: for.body: ; preds = %entry, %for.body %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.08 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.08 + %0 = load i64, ptr %arrayidx, align 8 %conv1 = sitofp i64 %0 to half - %arrayidx2 = getelementptr inbounds half, half* %dst, i64 %i.08 - store half %conv1, half* %arrayidx2, align 2 + %arrayidx2 = getelementptr inbounds half, ptr %dst, i64 %i.08 + store half %conv1, ptr %arrayidx2, align 2 %inc = add nuw nsw i64 %i.08, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -187,7 +187,7 @@ for.end: ; preds = %for.body, %entry } -define void @s8_to_s32(i32* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 { +define void @s8_to_s32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @s8_to_s32( ; CHECK: vector.body ; CHECK: %{{.*}} = sext %{{.*}} to @@ -196,11 +196,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07 - %0 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07 + %0 = load i8, ptr %arrayidx, align 1 %conv = sext i8 %0 to i32 - %arrayidx1 = getelementptr inbounds i32, i32* %dst, i64 %i.07 - store i32 %conv, i32* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %dst, i64 %i.07 + store i32 %conv, ptr %arrayidx1, align 4 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -210,7 +210,7 @@ for.end: ; preds = %for.body, %entry } -define void @u8_to_u16(i16* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 { +define void @u8_to_u16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @u8_to_u16( ; CHECK: vector.body ; CHECK: %{{.*}} = zext %{{.*}} to @@ -219,11 +219,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07 - %0 = load i8, i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07 + %0 = load i8, ptr %arrayidx, align 1 %conv = zext i8 %0 to i16 - %arrayidx1 = getelementptr inbounds i16, i16* %dst, i64 %i.07 - store i16 %conv, i16* %arrayidx1, align 2 + %arrayidx1 = getelementptr inbounds i16, ptr %dst, i64 %i.07 + store i16 %conv, ptr %arrayidx1, align 2 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 @@ -233,7 +233,7 @@ for.end: ; preds = %for.body, %entry } -define void @s64_to_s8(i8* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 { +define void @s64_to_s8(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 { ; CHECK-LABEL: @s64_to_s8( ; CHECK: vector.body ; CHECK: %{{.*}} = trunc %{{.*}} to @@ -242,11 +242,11 @@ entry: for.body: ; preds = %entry, %for.body %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.07 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.07 + %0 = load i64, ptr %arrayidx, align 8 %conv = trunc i64 %0 to i8 - %arrayidx1 = getelementptr inbounds i8, i8* %dst, i64 %i.07 - store i8 %conv, i8* %arrayidx1, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.07 + store i8 %conv, ptr %arrayidx1, align 1 %inc = add nuw nsw i64 %i.07, 1 %exitcond.not = icmp eq i64 %inc, %N br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll index a40ff86..34e2f34 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll @@ -16,13 +16,13 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" -define void @vector_reverse_mask_nxv4i1(double* %a, double* %cond, i64 %N) #0 { +define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 { ; CHECK-LABEL: vector.body: ; CHECK: %[[REVERSE6:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) -; CHECK: %[[WIDEMSKLOAD:.*]] = call @llvm.masked.load.nxv4f64.p0nxv4f64(* %{{.*}}, i32 8, %[[REVERSE6]], poison) +; CHECK: %[[WIDEMSKLOAD:.*]] = call @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, %[[REVERSE6]], poison) ; CHECK-NEXT: %[[FADD:.*]] = fadd %[[WIDEMSKLOAD]] ; CHECK: %[[REVERSE9:.*]] = call @llvm.experimental.vector.reverse.nxv4i1( %{{.*}}) -; CHECK: call void @llvm.masked.store.nxv4f64.p0nxv4f64( %[[FADD]], * %{{.*}}, i32 8, %[[REVERSE9]] +; CHECK: call void @llvm.masked.store.nxv4f64.p0( %[[FADD]], ptr %{{.*}}, i32 8, %[[REVERSE9]] entry: %cmp7 = icmp sgt i64 %N, 0 @@ -34,16 +34,16 @@ for.cond.cleanup: ; preds = %for.cond.cleanup, % for.body: ; preds = %for.body, %entry %i.08.in = phi i64 [ %i.08, %for.inc ], [ %N, %entry ] %i.08 = add nsw i64 %i.08.in, -1 - %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08 - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08 + %0 = load double, ptr %arrayidx, align 8 %tobool = fcmp une double %0, 0.000000e+00 br i1 %tobool, label %if.then, label %for.inc if.then: ; preds = %for.body - %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08 - %1 = load double, double* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08 + %1 = load double, ptr %arrayidx1, align 8 %add = fadd double %1, 1.000000e+00 - store double %add, double* %arrayidx1, align 8 + store double %add, ptr %arrayidx1, align 8 br label %for.inc for.inc: ; preds = %for.body, %if.then diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index cc51c14..003a3f9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -8,7 +8,7 @@ ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -S \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s -define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b) #0{ +define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-LABEL: @vector_reverse_f64( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0 @@ -28,23 +28,21 @@ define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b) ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[N]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = sub i32 1, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP12]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP10]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 3 ; CHECK-NEXT: [[TMP17:%.*]] = sub i32 1, [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to * -; CHECK-NEXT: store [[TMP14]], * [[TMP20]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 [[TMP18]] +; CHECK-NEXT: store [[TMP14]], ptr [[TMP19]], align 8 ; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[TMP21]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] @@ -63,11 +61,11 @@ define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b) ; CHECK: for.body: ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]] +; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP24]], 1.000000e+00 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]] -; CHECK-NEXT: store double [[ADD]], double* [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] +; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; @@ -81,21 +79,21 @@ for.cond.cleanup: ; preds = %for.body for.body: ; preds = %entry, %for.body %i.08.in = phi i64 [ %i.08, %for.body ], [ %N, %entry ] %i.08 = add nsw i64 %i.08.in, -1 - %arrayidx = getelementptr inbounds double, double* %b, i64 %i.08 - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %b, i64 %i.08 + %0 = load double, ptr %arrayidx, align 8 %add = fadd double %0, 1.000000e+00 - %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08 - store double %add, double* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08 + store double %add, ptr %arrayidx1, align 8 %cmp = icmp sgt i64 %i.08.in, 1 br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0 } -define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 { +define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: @vector_reverse_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A2:%.*]] = ptrtoint i64* [[A:%.*]] to i64 -; CHECK-NEXT: [[B1:%.*]] = ptrtoint i64* [[B:%.*]] to i64 +; CHECK-NEXT: [[A2:%.*]] = ptrtoint ptr [[A:%.*]] to i64 +; CHECK-NEXT: [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64 ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: @@ -122,23 +120,21 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[N]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP14:%.*]] = shl i32 [[TMP13]], 3 ; CHECK-NEXT: [[TMP15:%.*]] = sub i32 1, [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64* [[TMP17]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP17]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP22:%.*]] = shl i32 [[TMP21]], 3 ; CHECK-NEXT: [[TMP23:%.*]] = sub i32 1, [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP19]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = bitcast i64* [[TMP25]] to * -; CHECK-NEXT: store [[TMP20]], * [[TMP26]], align 8 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 [[TMP24]] +; CHECK-NEXT: store [[TMP20]], ptr [[TMP25]], align 8 ; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP27]], 3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]] @@ -157,11 +153,11 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 { ; CHECK: for.body: ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]] +; CHECK-NEXT: [[TMP30:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP30]], 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_09]] -; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]] ; @@ -175,11 +171,11 @@ for.cond.cleanup: ; preds = %for.body for.body: ; preds = %entry, %for.body %i.09.in = phi i64 [ %i.09, %for.body ], [ %N, %entry ] %i.09 = add nsw i64 %i.09.in, -1 - %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.09 - %0 = load i64, i64* %arrayidx, align 8 + %arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.09 + %0 = load i64, ptr %arrayidx, align 8 %add = add i64 %0, 1 - %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.09 - store i64 %add, i64* %arrayidx2, align 8 + %arrayidx2 = getelementptr inbounds i64, ptr %a, i64 %i.09 + store i64 %add, ptr %arrayidx2, align 8 %cmp = icmp sgt i64 %i.09.in, 1 br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0 } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index 3622e35..d9acd499 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -2,7 +2,7 @@ target triple = "aarch64-unknown-linux-gnu" -define void @widen_extractvalue(i64* %dst, {i64, i64} %sv) #0 { +define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-LABEL: @widen_extractvalue( ; CHECK: vector.body: ; CHECK: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0 @@ -19,9 +19,9 @@ loop.body: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ] %a = extractvalue { i64, i64 } %sv, 0 %b = extractvalue { i64, i64 } %sv, 1 - %addr = getelementptr i64, i64* %dst, i32 %iv + %addr = getelementptr i64, ptr %dst, i32 %iv %add = add i64 %a, %b - store i64 %add, i64* %addr + store i64 %add, ptr %addr %iv.next = add nsw i32 %iv, 1 %cond = icmp ne i32 %iv.next, 0 br i1 %cond, label %loop.body, label %exit, !llvm.loop !0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index 837d4cc..7272a69 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" ; CHECK-LABEL: test0 -define void @test0(i16* noalias %M3) { +define void @test0(ptr noalias %M3) { entry: br label %if.then1165.us @@ -14,8 +14,8 @@ if.then1165.us: ; preds = %if.then1165.us, %en %add1178.us = add nsw i32 %conv1177.us, undef %conv1179.us = trunc i32 %add1178.us to i16 %idxprom1181.us = ashr exact i64 undef, 32 - %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us - store i16 %conv1179.us, i16* %arrayidx1185.us, align 2 + %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us + store i16 %conv1179.us, ptr %arrayidx1185.us, align 2 %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1 %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16 br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us @@ -25,19 +25,19 @@ for.inc1286.loopexit: ; preds = %if.then1165.us } ; CHECK-LABEL: test1 -define void @test1(i16* noalias %M3) { +define void @test1(ptr noalias %M3) { entry: br label %if.then1165.us if.then1165.us: ; preds = %if.then1165.us, %entry %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ] - %fptr = load i32, i32* undef, align 4 + %fptr = load i32, ptr undef, align 4 %conv1177.us = zext i16 undef to i32 %add1178.us = add nsw i32 %conv1177.us, %fptr %conv1179.us = trunc i32 %add1178.us to i16 %idxprom1181.us = ashr exact i64 undef, 32 - %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us - store i16 %conv1179.us, i16* %arrayidx1185.us, align 2 + %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us + store i16 %conv1179.us, ptr %arrayidx1185.us, align 2 %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1 %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16 br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll index fb12e9c..9b635bf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll @@ -9,24 +9,24 @@ ; CHECK: LV: Selecting VF: 2. ; CHECK-LABEL: @test ; CHECK: <2 x i64> -define void @test(i64* nocapture %a, i64* nocapture readonly %b) { +define void @test(ptr nocapture %a, ptr nocapture readonly %b) { entry: br label %loop.header loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] - %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv - %0 = load i64, i64* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %iv - %1 = load i64, i64* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv + %0 = load i64, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 %iv + %1 = load i64, ptr %arrayidx2, align 4 %add = add nsw i64 %1, %0 %2 = add nuw nsw i64 %iv, 16 - %arrayidx5 = getelementptr inbounds i64, i64* %a, i64 %2 + %arrayidx5 = getelementptr inbounds i64, ptr %a, i64 %2 %c = icmp eq i64 %1, 120 br i1 %c, label %then, label %latch then: - store i64 %add, i64* %arrayidx5, align 4 + store i64 %add, ptr %arrayidx5, align 4 br label %latch latch: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll index 6cfdafe..c1e27ca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "arm64-apple-darwin" declare float @expf(float) nounwind readnone -define void @expf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @expf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @expf_v4f32( ; CHECK: call <4 x float> @_simd_exp_f4( ; CHECK: ret void @@ -14,11 +14,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @expf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -28,7 +28,7 @@ for.end: } declare double @exp(double) nounwind readnone -define void @exp_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @exp_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @exp_v2f64( ; CHECK: call <2 x double> @_simd_exp_d2( ; CHECK: ret void @@ -38,11 +38,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @exp(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -52,7 +52,7 @@ for.end: } declare float @acosf(float) nounwind readnone -define void @acos_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @acos_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @acos_v4f32( ; CHECK: call <4 x float> @_simd_acos_f4( ; CHECK: ret void @@ -62,11 +62,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @acosf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -76,7 +76,7 @@ for.end: } declare double @acos(double) nounwind readnone -define void @acos_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @acos_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @acos_v2f64( ; CHECK: call <2 x double> @_simd_acos_d2( ; CHECK: ret void @@ -86,11 +86,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @acos(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -100,7 +100,7 @@ for.end: } declare float @asinf(float) nounwind readnone -define void @asinf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @asinf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @asinf_v4f32( ; CHECK: call <4 x float> @_simd_asin_f4( ; CHECK: ret void @@ -110,11 +110,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @asinf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -124,7 +124,7 @@ for.end: } declare double @asin(double) nounwind readnone -define void @asin_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @asin_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @asin_v2f64( ; CHECK: call <2 x double> @_simd_asin_d2( ; CHECK: ret void @@ -134,11 +134,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @asin(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -148,7 +148,7 @@ for.end: } declare float @atanf(float) nounwind readnone -define void @atanf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @atanf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @atanf_v4f32( ; CHECK: call <4 x float> @_simd_atan_f4( ; CHECK: ret void @@ -158,11 +158,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @atanf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -172,7 +172,7 @@ for.end: } declare double @atan(double) nounwind readnone -define void @atan_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @atan_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @atan_v2f64( ; CHECK: call <2 x double> @_simd_atan_d2( ; CHECK: ret void @@ -182,11 +182,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @atan(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -196,7 +196,7 @@ for.end: } declare float @atan2f(float) nounwind readnone -define void @atan2f_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @atan2f_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @atan2f_v4f32( ; CHECK: call <4 x float> @_simd_atan2_f4( ; CHECK: ret void @@ -206,11 +206,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @atan2f(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -220,7 +220,7 @@ for.end: } declare double @atan2(double) nounwind readnone -define void @atan2_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @atan2_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @atan2_v2f64( ; CHECK: call <2 x double> @_simd_atan2_d2( ; CHECK: ret void @@ -230,11 +230,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @atan2(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -244,7 +244,7 @@ for.end: } declare float @cosf(float) nounwind readnone -define void @cosf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @cosf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @cosf_v4f32( ; CHECK: call <4 x float> @_simd_cos_f4( ; CHECK: ret void @@ -254,11 +254,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @cosf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -268,7 +268,7 @@ for.end: } declare double @cos(double) nounwind readnone -define void @cos_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @cos_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @cos_v2f64( ; CHECK: call <2 x double> @_simd_cos_d2( ; CHECK: ret void @@ -278,11 +278,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @cos(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -292,7 +292,7 @@ for.end: } declare float @cbrtf(float) nounwind readnone -define void @cbrtf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @cbrtf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @cbrtf_v4f32( ; CHECK: call <4 x float> @_simd_cbrt_f4( ; CHECK: ret void @@ -302,11 +302,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @cbrtf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -316,7 +316,7 @@ for.end: } declare double @cbrt(double) nounwind readnone -define void @cbrt_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @cbrt_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @cbrt_v2f64( ; CHECK: call <2 x double> @_simd_cbrt_d2( ; CHECK: ret void @@ -326,11 +326,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @cbrt(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -340,7 +340,7 @@ for.end: } declare float @erff(float) nounwind readnone -define void @erff_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @erff_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @erff_v4f32( ; CHECK: call <4 x float> @_simd_erf_f4( ; CHECK: ret void @@ -350,11 +350,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @erff(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -364,7 +364,7 @@ for.end: } declare double @erf(double) nounwind readnone -define void @erf_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @erf_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @erf_v2f64( ; CHECK: call <2 x double> @_simd_erf_d2( ; CHECK: ret void @@ -374,11 +374,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @erf(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -388,7 +388,7 @@ for.end: } declare float @powf(float) nounwind readnone -define void @powf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @powf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @powf_v4f32( ; CHECK: call <4 x float> @_simd_pow_f4( ; CHECK: ret void @@ -398,11 +398,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @powf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -412,7 +412,7 @@ for.end: } declare double @pow(double) nounwind readnone -define void @pow_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @pow_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @pow_v2f64( ; CHECK: call <2 x double> @_simd_pow_d2( ; CHECK: ret void @@ -422,11 +422,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @pow(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -436,7 +436,7 @@ for.end: } declare float @sinhf(float) nounwind readnone -define void @sinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @sinhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @sinhf_v4f32( ; CHECK: call <4 x float> @_simd_sinh_f4( ; CHECK: ret void @@ -446,11 +446,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @sinhf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -460,7 +460,7 @@ for.end: } declare double @sinh(double) nounwind readnone -define void @sinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @sinh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @sinh_v2f64( ; CHECK: call <2 x double> @_simd_sinh_d2( ; CHECK: ret void @@ -470,11 +470,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @sinh(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -484,7 +484,7 @@ for.end: } declare float @coshf(float) nounwind readnone -define void @coshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @coshf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @coshf_v4f32( ; CHECK: call <4 x float> @_simd_cosh_f4( ; CHECK: ret void @@ -494,11 +494,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @coshf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -508,7 +508,7 @@ for.end: } declare double @cosh(double) nounwind readnone -define void @cosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @cosh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @cosh_v2f64( ; CHECK: call <2 x double> @_simd_cosh_d2( ; CHECK: ret void @@ -518,11 +518,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @cosh(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -532,7 +532,7 @@ for.end: } declare float @tanhf(float) nounwind readnone -define void @tanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @tanhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @tanhf_v4f32( ; CHECK: call <4 x float> @_simd_tanh_f4( ; CHECK: ret void @@ -542,11 +542,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @tanhf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -556,7 +556,7 @@ for.end: } declare double @tanh(double) nounwind readnone -define void @tanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @tanh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @tanh_v2f64( ; CHECK: call <2 x double> @_simd_tanh_d2( ; CHECK: ret void @@ -566,11 +566,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @tanh(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -580,7 +580,7 @@ for.end: } declare float @asinhf(float) nounwind readnone -define void @asinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @asinhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @asinhf_v4f32( ; CHECK: call <4 x float> @_simd_asinh_f4( ; CHECK: ret void @@ -590,11 +590,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @asinhf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -604,7 +604,7 @@ for.end: } declare double @asinh(double) nounwind readnone -define void @asinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @asinh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @asinh_v2f64( ; CHECK: call <2 x double> @_simd_asinh_d2( ; CHECK: ret void @@ -614,11 +614,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @asinh(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -628,7 +628,7 @@ for.end: } declare float @acoshf(float) nounwind readnone -define void @acoshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @acoshf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @acoshf_v4f32( ; CHECK: call <4 x float> @_simd_acosh_f4( ; CHECK: ret void @@ -638,11 +638,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @acoshf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -652,7 +652,7 @@ for.end: } declare double @acosh(double) nounwind readnone -define void @acosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @acosh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @acosh_v2f64( ; CHECK: call <2 x double> @_simd_acosh_d2( ; CHECK: ret void @@ -662,11 +662,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @acosh(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -676,7 +676,7 @@ for.end: } declare float @atanhf(float) nounwind readnone -define void @atanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +define void @atanhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @atanhf_v4f32( ; CHECK: call <4 x float> @_simd_atanh_f4( ; CHECK: ret void @@ -686,11 +686,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds float, float* %y, i64 %iv - %lv = load float, float* %gep.y, align 4 + %gep.y = getelementptr inbounds float, ptr %y, i64 %iv + %lv = load float, ptr %gep.y, align 4 %call = tail call float @atanhf(float %lv) - %gep.x = getelementptr inbounds float, float* %x, i64 %iv - store float %call, float* %gep.x, align 4 + %gep.x = getelementptr inbounds float, ptr %x, i64 %iv + store float %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body @@ -700,7 +700,7 @@ for.end: } declare double @atanh(double) nounwind readnone -define void @atanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +define void @atanh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) { ; CHECK-LABEL: @atanh_v2f64( ; CHECK: call <2 x double> @_simd_atanh_d2( ; CHECK: ret void @@ -710,11 +710,11 @@ entry: for.body: %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] - %gep.y = getelementptr inbounds double, double* %y, i64 %iv - %lv = load double, double* %gep.y, align 4 + %gep.y = getelementptr inbounds double, ptr %y, i64 %iv + %lv = load double, ptr %gep.y, align 4 %call = tail call double @atanh(double %lv) - %gep.x = getelementptr inbounds double, double* %x, i64 %iv - store double %call, double* %gep.x, align 4 + %gep.x = getelementptr inbounds double, ptr %x, i64 %iv + store double %call, ptr %gep.x, align 4 %iv.next = add i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, %n br i1 %exitcond, label %for.end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll index 26511c1..ab50b32 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -17,7 +17,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" -define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, i64 %N) #0 { +define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-LABEL: @vector_reverse_mask_v4i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0 @@ -33,34 +33,28 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = xor i64 [[INDEX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[N]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, double* [[COND:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP6]], align 8 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, double* [[TMP10]], i64 -3 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 -3 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 -4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i64 -3 ; CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* -; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) +; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison) ; CHECK-NEXT: [[TMP16:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], ; CHECK-NEXT: [[TMP17:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], -; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP11]] to <4 x double>* -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP16]], <4 x double>* [[TMP18]], i32 8, <4 x i1> [[REVERSE3]]) -; CHECK-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP14]] to <4 x double>* -; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP17]], <4 x double>* [[TMP19]], i32 8, <4 x i1> [[REVERSE5]]) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP16]], ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]]) +; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP17]], ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -77,15 +71,15 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, ; CHECK: for.body: ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[COND]], i64 [[I_08]] -; CHECK-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]] +; CHECK-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une double [[TMP21]], 0.000000e+00 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]] -; CHECK-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] +; CHECK-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP22]], 1.000000e+00 -; CHECK-NEXT: store double [[ADD]], double* [[ARRAYIDX1]], align 8 +; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 @@ -102,16 +96,16 @@ for.cond.cleanup: ; preds = %for.cond.cleanup, % for.body: ; preds = %for.body, %entry %i.08.in = phi i64 [ %i.08, %for.inc ], [ %N, %entry ] %i.08 = add nsw i64 %i.08.in, -1 - %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08 - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08 + %0 = load double, ptr %arrayidx, align 8 %tobool = fcmp une double %0, 0.000000e+00 br i1 %tobool, label %if.then, label %for.inc if.then: ; preds = %for.body - %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08 - %1 = load double, double* %arrayidx1, align 8 + %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08 + %1 = load double, ptr %arrayidx1, align 8 %add = fadd double %1, 1.000000e+00 - store double %add, double* %arrayidx1, align 8 + store double %add, ptr %arrayidx1, align 8 br label %for.inc for.inc: ; preds = %for.body, %if.then diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll index cf7247f..b1f2ccd 100644 --- a/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll @@ -6,17 +6,17 @@ ; GCN-NOT: store <2 x half> ; REMARK: remark: :0:0: loop not vectorized: runtime pointer checks needed. Not enabled for divergent target -define amdgpu_kernel void @runtime_check_divergent_target(half addrspace(1)* nocapture %a, half addrspace(1)* nocapture %b) #0 { +define amdgpu_kernel void @runtime_check_divergent_target(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture %b) #0 { entry: br label %for.body for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds half, half addrspace(1)* %b, i64 %indvars.iv - %load = load half, half addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds half, ptr addrspace(1) %b, i64 %indvars.iv + %load = load half, ptr addrspace(1) %arrayidx, align 4 %mul = fmul half %load, 3.0 - %arrayidx2 = getelementptr inbounds half, half addrspace(1)* %a, i64 %indvars.iv - store half %mul, half addrspace(1)* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds half, ptr addrspace(1) %a, i64 %indvars.iv + store half %mul, ptr addrspace(1) %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 1024 diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll index 26da04e..e43a14b 100644 --- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll +++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll @@ -4,15 +4,15 @@ ; GFX90A-COUNT-2: load <2 x float> ; GFX90A-COUNT-2: fadd fast <2 x float> -define float @vectorize_v2f32_loop(float addrspace(1)* noalias %s) { +define float @vectorize_v2f32_loop(ptr addrspace(1) noalias %s) { entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %q.04 = phi float [ 0.0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds float, float addrspace(1)* %s, i64 %indvars.iv - %load = load float, float addrspace(1)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %s, i64 %indvars.iv + %load = load float, ptr addrspace(1) %arrayidx, align 4 %add = fadd fast float %q.04, %load %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 256 diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll index 626e2f5..b29abbd 100644 --- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll +++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll @@ -3,7 +3,7 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=VI %s ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=CI %s -define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) { +define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) { ; GFX9-LABEL: @vectorize_v2f16_loop( ; GFX9-NEXT: entry: ; GFX9-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -13,12 +13,10 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) { ; GFX9-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; GFX9-NEXT: [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; GFX9-NEXT: [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; GFX9-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]] -; GFX9-NEXT: [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)* -; GFX9-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2 -; GFX9-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2 -; GFX9-NEXT: [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)* -; GFX9-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2 +; GFX9-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]] +; GFX9-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2 +; GFX9-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP0]], i64 2 +; GFX9-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP2]], align 2 ; GFX9-NEXT: [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]] ; GFX9-NEXT: [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]] ; GFX9-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -45,12 +43,10 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) { ; VI-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VI-NEXT: [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; VI-NEXT: [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; VI-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]] -; VI-NEXT: [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)* -; VI-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2 -; VI-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2 -; VI-NEXT: [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)* -; VI-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2 +; VI-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]] +; VI-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2 +; VI-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP0]], i64 2 +; VI-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP2]], align 2 ; VI-NEXT: [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]] ; VI-NEXT: [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]] ; VI-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -74,8 +70,8 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) { ; CI: for.body: ; CI-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CI-NEXT: [[Q_04:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDVARS_IV]] -; CI-NEXT: [[TMP0:%.*]] = load half, half addrspace(1)* [[ARRAYIDX]], align 2 +; CI-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDVARS_IV]] +; CI-NEXT: [[TMP0:%.*]] = load half, ptr addrspace(1) [[ARRAYIDX]], align 2 ; CI-NEXT: [[ADD]] = fadd fast half [[Q_04]], [[TMP0]] ; CI-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CI-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256 @@ -89,8 +85,8 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %q.04 = phi half [ 0.0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds half, half addrspace(1)* %s, i64 %indvars.iv - %0 = load half, half addrspace(1)* %arrayidx, align 2 + %arrayidx = getelementptr inbounds half, ptr addrspace(1) %s, i64 %indvars.iv + %0 = load half, ptr addrspace(1) %arrayidx, align 2 %add = fadd fast half %q.04, %0 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 256 diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll index 528d06f..ac67257 100644 --- a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll +++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll @@ -7,17 +7,17 @@ ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: ret -define amdgpu_kernel void @small_loop(i32* nocapture %inArray, i32 %size) nounwind { +define amdgpu_kernel void @small_loop(ptr nocapture %inArray, i32 %size) nounwind { entry: %0 = icmp sgt i32 %size, 0 br i1 %0, label %loop, label %exit loop: ; preds = %entry, %loop %iv = phi i32 [ %iv1, %loop ], [ 0, %entry ] - %1 = getelementptr inbounds i32, i32* %inArray, i32 %iv - %2 = load i32, i32* %1, align 4 + %1 = getelementptr inbounds i32, ptr %inArray, i32 %iv + %2 = load i32, ptr %1, align 4 %3 = add nsw i32 %2, 6 - store i32 %3, i32* %1, align 4 + store i32 %3, ptr %1, align 4 %iv1 = add i32 %iv, 1 ; %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %cond = icmp eq i32 %iv1, %size diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll index 151919f..aee0aa4 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll @@ -16,7 +16,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" ; Integer loops are always vectorizeable ; CHECK: Checking a loop in 'sumi' ; CHECK: We can vectorize this loop! -define void @sumi(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) { +define void @sumi(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -26,13 +26,13 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06 - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06 - %1 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.06 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.06 + %1 = load i32, ptr %arrayidx1, align 4 %mul = mul nsw i32 %1, %0 - %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06 - store i32 %mul, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.06 + store i32 %mul, ptr %arrayidx2, align 4 %inc = add nuw nsw i32 %i.06, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.end.loopexit, label %for.body @@ -51,7 +51,7 @@ for.end: ; preds = %for.end.loopexit, % ; MVE: We can vectorize this loop! ; DARWIN: Checking a loop in 'sumf' ; DARWIN: We can vectorize this loop! -define void @sumf(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { +define void @sumf(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -61,13 +61,13 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06 - %0 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06 - %1 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.06 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.06 + %1 = load float, ptr %arrayidx1, align 4 %mul = fmul float %0, %1 - %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06 - store float %mul, float* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.06 + store float %mul, ptr %arrayidx2, align 4 %inc = add nuw nsw i32 %i.06, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.end.loopexit, label %for.body @@ -82,7 +82,7 @@ for.end: ; preds = %for.end.loopexit, % ; Integer loops are always vectorizeable ; CHECK: Checking a loop in 'redi' ; CHECK: We can vectorize this loop! -define i32 @redi(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { +define i32 @redi(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -93,10 +93,10 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07 - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07 - %1 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07 + %1 = load i32, ptr %arrayidx1, align 4 %mul = mul nsw i32 %1, %0 %add = add nsw i32 %mul, %Red.06 %inc = add nuw nsw i32 %i.07, 1 @@ -119,7 +119,7 @@ for.end: ; preds = %for.end.loopexit, % ; MVE: We can vectorize this loop! ; DARWIN: Checking a loop in 'redf' ; DARWIN: We can vectorize this loop! -define float @redf(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) { +define float @redf(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -130,10 +130,10 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ] - %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07 - %0 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07 - %1 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07 + %1 = load float, ptr %arrayidx1, align 4 %mul = fmul float %0, %1 %add = fadd float %Red.06, %mul %inc = add nuw nsw i32 %i.07, 1 @@ -154,21 +154,21 @@ for.end: ; preds = %for.end.loopexit, % ; LINUX: Potentially unsafe FP op prevents vectorization ; DARWIN: Checking a loop in 'fabs' ; DARWIN: We can vectorize this loop! -define void @fabs(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { +define void @fabs(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) { entry: %cmp10 = icmp eq i32 %N, 0 br i1 %cmp10, label %for.end, label %for.body for.body: ; preds = %entry, %for.body %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011 - %0 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011 - %1 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.011 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.011 + %1 = load float, ptr %arrayidx1, align 4 %fabsf = tail call float @fabsf(float %1) #1 %conv3 = fmul float %0, %fabsf - %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011 - store float %conv3, float* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %C, i32 %i.011 + store float %conv3, ptr %arrayidx4, align 4 %inc = add nuw nsw i32 %i.011, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.end, label %for.body @@ -180,7 +180,7 @@ for.end: ; preds = %for.body, %entry ; Integer loops are always vectorizeable ; CHECK: Checking a loop in 'sumi_fast' ; CHECK: We can vectorize this loop! -define void @sumi_fast(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) { +define void @sumi_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -190,13 +190,13 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06 - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06 - %1 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.06 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.06 + %1 = load i32, ptr %arrayidx1, align 4 %mul = mul nsw i32 %1, %0 - %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06 - store i32 %mul, i32* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.06 + store i32 %mul, ptr %arrayidx2, align 4 %inc = add nuw nsw i32 %i.06, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.end.loopexit, label %for.body @@ -211,7 +211,7 @@ for.end: ; preds = %for.end.loopexit, % ; Floating-point loops can be vectorizeable with fast-math ; CHECK: Checking a loop in 'sumf_fast' ; CHECK: We can vectorize this loop! -define void @sumf_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { +define void @sumf_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -221,13 +221,13 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06 - %0 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06 - %1 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.06 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.06 + %1 = load float, ptr %arrayidx1, align 4 %mul = fmul fast float %1, %0 - %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06 - store float %mul, float* %arrayidx2, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.06 + store float %mul, ptr %arrayidx2, align 4 %inc = add nuw nsw i32 %i.06, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.end.loopexit, label %for.body @@ -242,7 +242,7 @@ for.end: ; preds = %for.end.loopexit, % ; Integer loops are always vectorizeable ; CHECK: Checking a loop in 'redi_fast' ; CHECK: We can vectorize this loop! -define i32 @redi_fast(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { +define i32 @redi_fast(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -253,10 +253,10 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07 - %0 = load i32, i32* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07 - %1 = load i32, i32* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07 + %1 = load i32, ptr %arrayidx1, align 4 %mul = mul nsw i32 %1, %0 %add = add nsw i32 %mul, %Red.06 %inc = add nuw nsw i32 %i.07, 1 @@ -275,7 +275,7 @@ for.end: ; preds = %for.end.loopexit, % ; Floating-point loops can be vectorizeable with fast-math ; CHECK: Checking a loop in 'redf_fast' ; CHECK: We can vectorize this loop! -define float @redf_fast(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) { +define float @redf_fast(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) { entry: %cmp5 = icmp eq i32 %N, 0 br i1 %cmp5, label %for.end, label %for.body.preheader @@ -286,10 +286,10 @@ for.body.preheader: ; preds = %entry for.body: ; preds = %for.body.preheader, %for.body %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ] - %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07 - %0 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07 - %1 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07 + %1 = load float, ptr %arrayidx1, align 4 %mul = fmul fast float %1, %0 %add = fadd fast float %mul, %Red.06 %inc = add nuw nsw i32 %i.07, 1 @@ -308,21 +308,21 @@ for.end: ; preds = %for.end.loopexit, % ; Make sure calls that turn into builtins are also covered ; CHECK: Checking a loop in 'fabs_fast' ; CHECK: We can vectorize this loop! -define void @fabs_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { +define void @fabs_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) { entry: %cmp10 = icmp eq i32 %N, 0 br i1 %cmp10, label %for.end, label %for.body for.body: ; preds = %entry, %for.body %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011 - %0 = load float, float* %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011 - %1 = load float, float* %arrayidx1, align 4 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.011 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.011 + %1 = load float, ptr %arrayidx1, align 4 %fabsf = tail call fast float @fabsf(float %1) #2 %conv3 = fmul fast float %fabsf, %0 - %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011 - store float %conv3, float* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %C, i32 %i.011 + store float %conv3, ptr %arrayidx4, align 4 %inc = add nuw nsw i32 %i.011, 1 %exitcond = icmp eq i32 %inc, %N br i1 %exitcond, label %for.end, label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll index b1c400a..9ad9347 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll @@ -13,15 +13,15 @@ target triple = "thumbv7-apple-ios3.0.0" ;SWIFT: load <4 x i32> ;SWIFT: load <4 x i32> ;SWIFT: ret -define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp { +define i32 @foo(ptr nocapture %A, i32 %n) nounwind readonly ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge .lr.ph: ; preds = %0, %.lr.ph %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ] %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ] - %2 = getelementptr inbounds i32, i32* %A, i32 %i.02 - %3 = load i32, i32* %2, align 4 + %2 = getelementptr inbounds i32, ptr %A, i32 %i.02 + %3 = load i32, ptr %2, align 4 %4 = add nsw i32 %3, %sum.01 %5 = add nsw i32 %i.02, 1 %exitcond = icmp eq i32 %5, %n @@ -36,7 +36,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp { ;SWIFTUNROLL-LABEL: @register_limit( ;SWIFTUNROLL: load i32 ;SWIFTUNROLL-NOT: load i32 -define i32 @register_limit(i32* nocapture %A, i32 %n) { +define i32 @register_limit(ptr nocapture %A, i32 %n) { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -48,8 +48,8 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) { %sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ] %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] - %2 = getelementptr inbounds i32, i32* %A, i32 %i.02 - %3 = load i32, i32* %2, align 4 + %2 = getelementptr inbounds i32, ptr %A, i32 %i.02 + %3 = load i32, ptr %2, align 4 %4 = add nsw i32 %3, %sum.01 %5 = add nsw i32 %i.02, 1 %6 = add nsw i32 %3, %sum.02 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll index 93f736e..5b0a7fa 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll @@ -33,32 +33,32 @@ for.body: %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ] %add = add i32 %v.055, %offset %mul = mul i32 %add, 3 - %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %mul - %0 = load float, float* %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i32 0, i32 %v.055 - %1 = load float, float* %arrayidx2, align 4 + %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %mul + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i32 0, i32 %v.055 + %1 = load float, ptr %arrayidx2, align 4 %mul3 = fmul fast float %0, %1 - %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i32 0, i32 %v.055 - %2 = load float, float* %arrayidx4, align 4 + %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i32 0, i32 %v.055 + %2 = load float, ptr %arrayidx4, align 4 %mul5 = fmul fast float %mul3, %2 - %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i32 0, i32 %v.055 - %3 = load float, float* %arrayidx6, align 4 + %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i32 0, i32 %v.055 + %3 = load float, ptr %arrayidx6, align 4 %mul7 = fmul fast float %mul5, %3 - %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i32 0, i32 %v.055 - %4 = load float, float* %arrayidx8, align 4 + %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i32 0, i32 %v.055 + %4 = load float, ptr %arrayidx8, align 4 %mul9 = fmul fast float %mul7, %4 %add10 = fadd fast float %r.057, %mul9 %arrayidx.sum = add i32 %mul, 1 - %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum - %5 = load float, float* %arrayidx11, align 4 + %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %arrayidx.sum + %5 = load float, ptr %arrayidx11, align 4 %mul13 = fmul fast float %1, %5 %mul15 = fmul fast float %2, %mul13 %mul17 = fmul fast float %3, %mul15 %mul19 = fmul fast float %4, %mul17 %add20 = fadd fast float %g.056, %mul19 %arrayidx.sum52 = add i32 %mul, 2 - %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum52 - %6 = load float, float* %arrayidx21, align 4 + %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %arrayidx.sum52 + %6 = load float, ptr %arrayidx21, align 4 %mul23 = fmul fast float %1, %6 %mul25 = fmul fast float %2, %mul23 %mul27 = fmul fast float %3, %mul25 @@ -81,8 +81,8 @@ for.end: %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ] %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ] - store i8 %r.0.lcssa, i8* @r_, align 4 - store i8 %g.0.lcssa, i8* @g_, align 4 - store i8 %b.0.lcssa, i8* @b_, align 4 + store i8 %r.0.lcssa, ptr @r_, align 4 + store i8 %g.0.lcssa, ptr @g_, align 4 + store i8 %b.0.lcssa, ptr @b_, align 4 ret void } diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll index def0f5d..cb8899c 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll @@ -18,13 +18,13 @@ define void @example1() nounwind uwtable ssp { ;