From 7d7577256b76e4293f455b8093504d5f7044ab4b Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 14 Dec 2022 15:15:18 +0100
Subject: [PATCH] [LoopVectorize] Convert some tests to opaque pointers (NFC)

---
 .../Transforms/LoopVectorize/12-12-11-if-conv.ll   |    8 +-
 .../LoopVectorize/2012-10-22-isconsec.ll           |   27 +-
 .../AArch64/Oz-and-forced-vectorize.ll             |   12 +-
 .../LoopVectorize/AArch64/aarch64-predication.ll   |   11 +-
 .../LoopVectorize/AArch64/aarch64-unroll.ll        |   16 +-
 .../LoopVectorize/AArch64/arm64-unroll.ll          |   16 +-
 .../LoopVectorize/AArch64/backedge-overflow.ll     |   72 +-
 .../AArch64/eliminate-tail-predication.ll          |    6 +-
 ...vectorization-factor-for-unprofitable-memops.ll |   60 +-
 .../LoopVectorize/AArch64/gather-cost.ll           |   34 +-
 .../LoopVectorize/AArch64/induction-trunc.ll       |   14 +-
 .../interleaved-store-of-first-order-recurrence.ll |   27 +-
 .../LoopVectorize/AArch64/interleaved-vs-scalar.ll |   14 +-
 .../LoopVectorize/AArch64/interleaved_cost.ll      |  182 +--
 .../AArch64/loop-vectorization-factors.ll          |  102 +-
 .../AArch64/loopvectorize_pr33804_double.ll        |   54 +-
 .../LoopVectorize/AArch64/masked-call.ll           |  230 ++--
 .../LoopVectorize/AArch64/masked-op-cost.ll        |   28 +-
 .../AArch64/max-vf-for-interleaved.ll              |   18 +-
 .../AArch64/no_vector_instructions.ll              |   12 +-
 .../outer_loop_test1_no_explicit_vect_width.ll     |   32 +-
 .../Transforms/LoopVectorize/AArch64/pr31900.ll    |   30 +-
 .../Transforms/LoopVectorize/AArch64/pr33053.ll    |   12 +-
 .../Transforms/LoopVectorize/AArch64/pr36032.ll    |   22 +-
 .../AArch64/pr46950-load-cast-context-crash.ll     |    8 +-
 .../LoopVectorize/AArch64/predication_costs.ll     |   72 +-
 .../LoopVectorize/AArch64/reduction-small-size.ll  |   30 +-
 .../AArch64/runtime-check-size-based-threshold.ll  |   78 +-
 .../LoopVectorize/AArch64/scalable-alloca.ll       |   10 +-
 .../LoopVectorize/AArch64/scalable-call.ll         |   88 +-
 .../AArch64/scalable-predicate-instruction.ll      |   30 +-
 .../AArch64/scalable-reductions-tf.ll              |   12 +-
 .../LoopVectorize/AArch64/scalable-reductions.ll   |   94 +-
 .../AArch64/scalable-vectorization-cost-tuning.ll  |   14 +-
 .../AArch64/scalable-vectorization.ll              |   70 +-
 .../LoopVectorize/AArch64/scalable-vf-hint.ll      |  110 +-
 .../AArch64/scalarize-store-with-predication.ll    |   12 +-
 .../Transforms/LoopVectorize/AArch64/sdiv-pow2.ll  |   10 +-
 .../LoopVectorize/AArch64/select-costs.ll          |    8 +-
 .../AArch64/smallest-and-widest-types.ll           |   18 +-
 .../LoopVectorize/AArch64/strict-fadd-cost.ll      |   32 +-
 .../LoopVectorize/AArch64/strict-fadd-vf1.ll       |   10 +-
 .../LoopVectorize/AArch64/sve-basic-vec.ll         |   42 +-
 .../LoopVectorize/AArch64/sve-cond-inv-loads.ll    |  133 +-
 .../AArch64/sve-gather-scatter-cost.ll             |  104 +-
 .../LoopVectorize/AArch64/sve-gather-scatter.ll    |  173 ++-
 .../LoopVectorize/AArch64/sve-inductions.ll        |   28 +-
 .../LoopVectorize/AArch64/sve-inv-loads.ll         |   22 +-
 .../LoopVectorize/AArch64/sve-large-strides.ll     |   46 +-
 .../LoopVectorize/AArch64/sve-low-trip-count.ll    |   40 +-
 .../LoopVectorize/AArch64/sve-masked-loadstore.ll  |   44 +-
 .../AArch64/sve-scalable-load-in-loop.ll           |    6 +-
 .../LoopVectorize/AArch64/sve-select-cmp.ll        |   36 +-
 .../LoopVectorize/AArch64/sve-strict-fadd-cost.ll  |   12 +-
 .../LoopVectorize/AArch64/sve-type-conv.ll         |  110 +-
 .../AArch64/sve-vector-reverse-mask4.ll            |   16 +-
 .../LoopVectorize/AArch64/sve-vector-reverse.ll    |   68 +-
 .../AArch64/sve-widen-extractvalue.ll              |    6 +-
 .../AArch64/type-shrinkage-insertelt.ll            |   14 +-
 .../LoopVectorize/AArch64/unsafe-vf-hint-remark.ll |   14 +-
 .../AArch64/veclib-calls-libsystem-darwin.ll       |  300 ++--
 .../LoopVectorize/AArch64/vector-reverse-mask4.ll  |   56 +-
 .../AMDGPU/divergent-runtime-check.ll              |   10 +-
 .../Transforms/LoopVectorize/AMDGPU/packed-fp32.ll |    6 +-
 .../Transforms/LoopVectorize/AMDGPU/packed-math.ll |   30 +-
 .../AMDGPU/unroll-in-loop-vectorizer.ll            |    8 +-
 .../LoopVectorize/ARM/arm-ieee-vectorize.ll        |  124 +-
 .../Transforms/LoopVectorize/ARM/arm-unroll.ll     |   12 +-
 .../Transforms/LoopVectorize/ARM/gather-cost.ll    |   34 +-
 .../Transforms/LoopVectorize/ARM/gcc-examples.ll   |   22 +-
 .../LoopVectorize/ARM/interleaved_cost.ll          |  144 +-
 .../LoopVectorize/ARM/mve-interleaved-cost.ll      | 1428 ++++++++++----------
 .../Transforms/LoopVectorize/ARM/mve-maskedldst.ll |   46 +-
 .../LoopVectorize/ARM/mve-predstorecost.ll         |   54 +-
 .../LoopVectorize/ARM/mve-reduction-predselect.ll  |  191 ++-
 .../Transforms/LoopVectorize/ARM/mve-reductions.ll |  447 +++---
 .../Transforms/LoopVectorize/ARM/mve-shiftcost.ll  |    8 +-
 .../LoopVectorize/ARM/prefer-tail-loop-folding.ll  |  232 ++--
 .../LoopVectorize/ARM/scalar-block-cost.ll         |   44 +-
 .../LoopVectorize/ARM/tail-fold-multiple-icmps.ll  |   19 +-
 .../LoopVectorize/ARM/tail-folding-loop-hint.ll    |   38 +-
 .../LoopVectorize/ARM/tail-folding-prefer-flag.ll  |   62 +-
 .../ARM/tail-folding-reductions-allowed.ll         |    6 +-
 .../Transforms/LoopVectorize/ARM/width-detect.ll   |   12 +-
 ...idate-cm-after-invalidating-interleavegroups.ll |   52 +-
 .../LoopVectorize/Hexagon/maximum-vf-crash.ll      |    2 +-
 .../Transforms/LoopVectorize/Hexagon/minimum-vf.ll |   96 +-
 .../LoopVectorize/PowerPC/interleave_IC.ll         |   42 +-
 ...terleaved-pointer-runtime-check-unprofitable.ll |   84 +-
 .../LoopVectorize/PowerPC/large-loop-rdx.ll        |   28 +-
 .../LoopVectorize/PowerPC/massv-altivec.ll         |   24 +-
 .../LoopVectorize/PowerPC/massv-calls.ll           |  388 +++---
 .../LoopVectorize/PowerPC/massv-nobuiltin.ll       |   12 +-
 .../LoopVectorize/PowerPC/massv-unsupported.ll     |   24 +-
 .../optimal-epilog-vectorization-profitability.ll  |   42 +-
 .../Transforms/LoopVectorize/PowerPC/pr41179.ll    |   26 +-
 .../LoopVectorize/PowerPC/small-loop-rdx.ll        |    6 +-
 .../LoopVectorize/PowerPC/stride-vectorization.ll  |   14 +-
 .../PowerPC/vectorize-only-for-real.ll             |   32 +-
 .../LoopVectorize/PowerPC/vsx-tsvc-s173.ll         |   18 +-
 .../LoopVectorize/PowerPC/widened-massv-call.ll    |   11 +-
 .../PowerPC/widened-massv-vfabi-attr.ll            |   11 +-
 .../LoopVectorize/RISCV/riscv-interleaved.ll       |    6 +-
 .../LoopVectorize/RISCV/scalable-reductions.ll     |   90 +-
 .../LoopVectorize/RISCV/scalable-vf-hint.ll        |   14 +-
 .../RISCV/unroll-in-loop-vectorizer.ll             |   14 +-
 .../Transforms/LoopVectorize/SystemZ/addressing.ll |   43 +-
 .../SystemZ/branch-for-predicated-block.ll         |    8 +-
 .../SystemZ/load-scalarization-cost-1.ll           |    8 +-
 .../SystemZ/load-store-scalarization-cost.ll       |   16 +-
 .../SystemZ/mem-interleaving-costs-02.ll           |   88 +-
 .../SystemZ/mem-interleaving-costs.ll              |   46 +-
 .../LoopVectorize/SystemZ/zero_unroll.ll           |    4 +-
 .../test/Transforms/LoopVectorize/VE/disable_lv.ll |   10 +-
 .../LoopVectorize/X86/already-vectorized.ll        |    4 +-
 llvm/test/Transforms/LoopVectorize/X86/avx1.ll     |   16 +-
 llvm/test/Transforms/LoopVectorize/X86/avx512.ll   |   18 +-
 .../LoopVectorize/X86/consecutive-ptr-cg-bug.ll    |    8 +-
 .../LoopVectorize/X86/constant-vector-operand.ll   |    8 +-
 .../Transforms/LoopVectorize/X86/cost-model.ll     |  178 +--
 .../LoopVectorize/X86/float-induction-x86.ll       |  420 +++---
 .../test/Transforms/LoopVectorize/X86/fneg-cost.ll |    8 +-
 .../LoopVectorize/X86/fp32_to_uint32-cost-model.ll |    8 +-
 .../LoopVectorize/X86/fp64_to_uint32-cost-model.ll |   10 +-
 .../LoopVectorize/X86/fp_to_sint8-cost-model.ll    |   10 +-
 llvm/test/Transforms/LoopVectorize/X86/funclet.ll  |   10 +-
 .../Transforms/LoopVectorize/X86/gather-cost.ll    |   68 +-
 .../LoopVectorize/X86/gather-vs-interleave.ll      |    8 +-
 .../Transforms/LoopVectorize/X86/gcc-examples.ll   |   22 +-
 .../LoopVectorize/X86/int128_no_gather.ll          |   26 +-
 .../LoopVectorize/X86/interleave_short_tc.ll       |   10 +-
 .../X86/interleaved-accesses-large-gap.ll          |   38 +-
 .../X86/interleaved-accesses-waw-dependency.ll     |   79 +-
 .../Transforms/LoopVectorize/X86/interleaving.ll   |  126 +-
 .../LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll |   86 +-
 .../LoopVectorize/X86/libm-vector-calls-finite.ll  |   44 +-
 .../LoopVectorize/X86/libm-vector-calls.ll         |   86 +-
 .../Transforms/LoopVectorize/X86/max-mstore.ll     |   10 +-
 .../LoopVectorize/X86/min-trip-count-switch.ll     |    8 +-
 .../Transforms/LoopVectorize/X86/mul_slm_16bit.ll  |   20 +-
 .../test/Transforms/LoopVectorize/X86/no-vector.ll |    6 +-
 .../test/Transforms/LoopVectorize/X86/no_fpmath.ll |   12 +-
 .../LoopVectorize/X86/no_fpmath_with_hotness.ll    |   12 +-
 .../Transforms/LoopVectorize/X86/nontemporal.ll    |   38 +-
 .../X86/outer_loop_test1_no_explicit_vect_width.ll |   24 +-
 .../X86/parallel-loops-after-reg2mem.ll            |   34 +-
 .../Transforms/LoopVectorize/X86/parallel-loops.ll |  141 +-
 .../test/Transforms/LoopVectorize/X86/powof2div.ll |   10 +-
 llvm/test/Transforms/LoopVectorize/X86/pr23997.ll  |   78 +-
 llvm/test/Transforms/LoopVectorize/X86/pr39160.ll  |    4 +-
 llvm/test/Transforms/LoopVectorize/X86/pr42674.ll  |   14 +-
 ...-select-interleave-count-loop-with-cost-zero.ll |   12 +-
 .../LoopVectorize/X86/reduction-crash.ll           |   12 +-
 .../LoopVectorize/X86/reduction-small-size.ll      |   10 +-
 .../LoopVectorize/X86/redundant-vf2-cost.ll        |    8 +-
 .../LoopVectorize/X86/reg-usage-debug.ll           |   14 +-
 .../test/Transforms/LoopVectorize/X86/reg-usage.ll |   36 +-
 .../LoopVectorize/X86/register-assumption.ll       |    2 +-
 .../Transforms/LoopVectorize/X86/runtime-limit.ll  |   66 +-
 .../LoopVectorize/X86/slm-no-vectorize.ll          |   10 +-
 .../Transforms/LoopVectorize/X86/struct-store.ll   |    4 +-
 .../LoopVectorize/X86/svml-calls-finite.ll         |   92 +-
 .../Transforms/LoopVectorize/X86/svml-calls.ll     |  220 +--
 .../X86/tail_folding_and_assume_safety.ll          |   42 +-
 .../test/Transforms/LoopVectorize/X86/tripcount.ll |    6 +-
 .../LoopVectorize/X86/uint64_to_fp64-cost-model.ll |   10 +-
 .../Transforms/LoopVectorize/X86/uniform-phi.ll    |   38 +-
 .../Transforms/LoopVectorize/X86/uniform_load.ll   |   12 +-
 .../Transforms/LoopVectorize/X86/uniformshift.ll   |    8 +-
 .../test/Transforms/LoopVectorize/X86/unroll-pm.ll |    8 +-
 .../LoopVectorize/X86/unroll-small-loops.ll        |   30 +-
 .../LoopVectorize/X86/unroll_selection.ll          |   16 +-
 .../Transforms/LoopVectorize/X86/veclib-calls.ll   |  250 ++--
 .../Transforms/LoopVectorize/X86/vect.omp.force.ll |   22 +-
 .../LoopVectorize/X86/vector-scalar-select-cost.ll |   24 +-
 .../LoopVectorize/X86/vector_max_bandwidth.ll      |   36 +-
 .../LoopVectorize/X86/vector_ptr_load_store.ll     |   50 +-
 .../X86/vectorization-remarks-loopid-dbg.ll        |   16 +-
 .../X86/vectorization-remarks-missed.ll            |   36 +-
 .../X86/vectorization-remarks-profitable.ll        |   28 +-
 .../LoopVectorize/X86/vectorization-remarks.ll     |   16 +-
 .../LoopVectorize/X86/vectorize-only-for-real.ll   |    6 +-
 .../X86/x86-interleaved-accesses-masked-group.ll   | 1154 ++++++++--------
 .../x86-interleaved-store-accesses-with-gaps.ll    |  206 ++-
 .../Transforms/LoopVectorize/X86/x86-pr39099.ll    |   12 +-
 .../X86/x86_fp80-interleaved-access.ll             |   10 +-
 .../LoopVectorize/X86/x86_fp80-vector-store.ll     |   12 +-
 .../alias-set-with-uncomputable-bounds.ll          |   80 +-
 llvm/test/Transforms/LoopVectorize/align.ll        |   18 +-
 llvm/test/Transforms/LoopVectorize/assume.ll       |   47 +-
 llvm/test/Transforms/LoopVectorize/bsd_regex.ll    |   22 +-
 llvm/test/Transforms/LoopVectorize/calloc.ll       |   16 +-
 .../Transforms/LoopVectorize/cast-induction.ll     |    4 +-
 .../Transforms/LoopVectorize/check-prof-info.ll    |   20 +-
 .../LoopVectorize/conditional-assignment.ll        |    8 +-
 llvm/test/Transforms/LoopVectorize/control-flow.ll |    8 +-
 .../test/Transforms/LoopVectorize/cpp-new-array.ll |   35 +-
 llvm/test/Transforms/LoopVectorize/dbg.value.ll    |   12 +-
 .../Transforms/LoopVectorize/dead_instructions.ll  |   16 +-
 llvm/test/Transforms/LoopVectorize/debugloc.ll     |   50 +-
 .../demanded-bits-of-pointer-instruction.ll        |    8 +-
 .../LoopVectorize/diag-missing-instr-debug-loc.ll  |    8 +-
 .../LoopVectorize/diag-with-hotness-info-2.ll      |   84 +-
 .../LoopVectorize/diag-with-hotness-info.ll        |   84 +-
 .../Transforms/LoopVectorize/disable_nonforced.ll  |    6 +-
 .../LoopVectorize/disable_nonforced_enable.ll      |    6 +-
 .../test/Transforms/LoopVectorize/discriminator.ll |   18 +-
 llvm/test/Transforms/LoopVectorize/exact.ll        |    8 +-
 .../LoopVectorize/explicit_outer_detection.ll      |   42 +-
 .../explicit_outer_nonuniform_inner.ll             |   32 +-
 .../explicit_outer_uniform_diverg_branch.ll        |   30 +-
 .../LoopVectorize/extract-last-veclane.ll          |   64 +-
 .../Transforms/LoopVectorize/fix-reduction-dbg.ll  |    6 +-
 llvm/test/Transforms/LoopVectorize/flags.ll        |   22 +-
 .../Transforms/LoopVectorize/float-induction.ll    |  504 ++++---
 .../Transforms/LoopVectorize/float-reduction.ll    |   24 +-
 llvm/test/Transforms/LoopVectorize/fneg.ll         |   12 +-
 llvm/test/Transforms/LoopVectorize/followup.ll     |    6 +-
 .../Transforms/LoopVectorize/forked-pointers.ll    |  118 +-
 llvm/test/Transforms/LoopVectorize/funcall.ll      |    8 +-
 llvm/test/Transforms/LoopVectorize/gcc-examples.ll |  306 ++---
 .../Transforms/LoopVectorize/gep_with_bitcast.ll   |   18 +-
 llvm/test/Transforms/LoopVectorize/hints-trans.ll  |    6 +-
 llvm/test/Transforms/LoopVectorize/hoist-loads.ll  |   22 +-
 llvm/test/Transforms/LoopVectorize/i8-induction.ll |    6 +-
 .../test/Transforms/LoopVectorize/icmp-uniforms.ll |   12 +-
 .../test/Transforms/LoopVectorize/if-conv-crash.ll |    4 +-
 .../LoopVectorize/if-conversion-edgemasks.ll       |   26 +-
 .../LoopVectorize/if-conversion-reduction.ll       |    6 +-
 .../test/Transforms/LoopVectorize/if-conversion.ll |   24 +-
 .../LoopVectorize/if-pred-not-when-safe.ll         |   56 +-
 llvm/test/Transforms/LoopVectorize/if-reduction.ll |  152 +--
 .../Transforms/LoopVectorize/incorrect-dom-info.ll |   12 +-
 llvm/test/Transforms/LoopVectorize/increment.ll    |   18 +-
 .../induction-multiple-uses-in-same-instruction.ll |   18 +-
 .../LoopVectorize/induction-unroll-novec.ll        |   14 +-
 llvm/test/Transforms/LoopVectorize/infiniteloop.ll |    6 +-
 .../Transforms/LoopVectorize/int_sideeffect.ll     |    6 +-
 .../LoopVectorize/interleaved-accesses-1.ll        |   23 +-
 .../LoopVectorize/interleaved-accesses-2.ll        |   16 +-
 .../LoopVectorize/interleaved-accesses-3.ll        |   14 +-
 .../LoopVectorize/interleaved-accesses-alias.ll    |   28 +-
 .../interleaved-accesses-masked-group.ll           |   60 +-
 .../interleaved-accesses-pred-stores.ll            |  120 +-
 .../interleaved-accesses-uniform-load.ll           |   14 +-
 .../interleaved-acess-with-remarks.ll              |   34 +-
 llvm/test/Transforms/LoopVectorize/intrinsic.ll    |  650 ++++-----
 .../Transforms/LoopVectorize/irregular_type.ll     |    8 +-
 .../Transforms/LoopVectorize/libcall-remark.ll     |   12 +-
 llvm/test/Transforms/LoopVectorize/lifetime.ll     |   39 +-
 llvm/test/Transforms/LoopVectorize/loop-scalars.ll |  105 +-
 .../Transforms/LoopVectorize/loop-vect-memdep.ll   |   16 +-
 .../Transforms/LoopVectorize/memdep-fold-tail.ll   |   32 +-
 llvm/test/Transforms/LoopVectorize/memdep.ll       |   96 +-
 .../Transforms/LoopVectorize/memory-dep-remarks.ll |  108 +-
 .../Transforms/LoopVectorize/metadata-unroll.ll    |    6 +-
 .../Transforms/LoopVectorize/metadata-width.ll     |   24 +-
 llvm/test/Transforms/LoopVectorize/metadata.ll     |   14 +-
 .../Transforms/LoopVectorize/middle-block-dbg.ll   |   16 +-
 llvm/test/Transforms/LoopVectorize/miniters.ll     |   12 +-
 .../Transforms/LoopVectorize/minmax_reduction.ll   |  208 +--
 .../LoopVectorize/mixed-precision-remarks.ll       |   26 +-
 .../LoopVectorize/multi-use-reduction-bug.ll       |    8 +-
 .../LoopVectorize/multiple-exits-versioning.ll     |   24 +-
 .../Transforms/LoopVectorize/no_array_bounds.ll    |   22 +-
 .../LoopVectorize/no_array_bounds_scalable.ll      |   24 +-
 .../Transforms/LoopVectorize/no_idiv_reduction.ll  |    4 +-
 .../Transforms/LoopVectorize/no_int_induction.ll   |   24 +-
 llvm/test/Transforms/LoopVectorize/no_switch.ll    |   10 +-
 .../no_switch_disable_vectorization.ll             |   10 +-
 .../Transforms/LoopVectorize/noalias-md-licm.ll    |   18 +-
 llvm/test/Transforms/LoopVectorize/noalias-md.ll   |   16 +-
 .../Transforms/LoopVectorize/noalias-scope-decl.ll |   43 +-
 .../Transforms/LoopVectorize/nofloat-report.ll     |    4 +-
 llvm/test/Transforms/LoopVectorize/nofloat.ll      |    4 +-
 llvm/test/Transforms/LoopVectorize/non-const-n.ll  |   12 +-
 llvm/test/Transforms/LoopVectorize/nontemporal.ll  |   14 +-
 llvm/test/Transforms/LoopVectorize/nounroll.ll     |   18 +-
 .../LoopVectorize/novect-lcssa-cfg-invalidation.ll |    6 +-
 llvm/test/Transforms/LoopVectorize/nuw.ll          |   14 +-
 llvm/test/Transforms/LoopVectorize/opt.ll          |    6 +-
 .../optimal-epilog-vectorization-limitations.ll    |   16 +-
 .../optimal-epilog-vectorization-scalable.ll       |    6 +-
 llvm/test/Transforms/LoopVectorize/optsize.ll      |   68 +-
 .../outer-loop-vec-phi-predecessor-order.ll        |   20 +-
 .../Transforms/LoopVectorize/outer_loop_test1.ll   |   16 +-
 .../Transforms/LoopVectorize/outer_loop_test2.ll   |   24 +-
 .../test/Transforms/LoopVectorize/partial-lcssa.ll |   18 +-
 llvm/test/Transforms/LoopVectorize/phi-cost.ll     |   28 +-
 llvm/test/Transforms/LoopVectorize/pr25281.ll      |   28 +-
 llvm/test/Transforms/LoopVectorize/pr28541.ll      |   10 +-
 .../Transforms/LoopVectorize/pr30806-phi-scev.ll   |   25 +-
 llvm/test/Transforms/LoopVectorize/pr31098.ll      |   26 +-
 llvm/test/Transforms/LoopVectorize/pr31190.ll      |    8 +-
 llvm/test/Transforms/LoopVectorize/pr33706.ll      |   30 +-
 llvm/test/Transforms/LoopVectorize/pr34681.ll      |   12 +-
 llvm/test/Transforms/LoopVectorize/pr36311.ll      |    6 +-
 llvm/test/Transforms/LoopVectorize/pr37248.ll      |    6 +-
 llvm/test/Transforms/LoopVectorize/pr39099.ll      |   14 +-
 .../LoopVectorize/pr44488-predication.ll           |   16 +-
 llvm/test/Transforms/LoopVectorize/pr45525.ll      |    6 +-
 .../LoopVectorize/pr45679-fold-tail-by-masking.ll  |  164 +--
 .../LoopVectorize/pr46525-expander-insertpoint.ll  |    8 +-
 .../pr47343-expander-lcssa-after-cfg-update.ll     |   30 +-
 llvm/test/Transforms/LoopVectorize/pr48832.ll      |    4 +-
 .../LoopVectorize/pr51614-fold-tail-by-masking.ll  |   32 +-
 .../preserve-dbg-loc-and-loop-metadata.ll          |    8 +-
 .../test/Transforms/LoopVectorize/ptr-induction.ll |   12 +-
 llvm/test/Transforms/LoopVectorize/ptr_loops.ll    |   42 +-
 llvm/test/Transforms/LoopVectorize/read-only.ll    |   20 +-
 .../LoopVectorize/reduction-inloop-cond.ll         |  250 ++--
 .../LoopVectorize/reduction-inloop-pred.ll         |  530 ++++----
 .../LoopVectorize/reduction-inloop-uf4.ll          |  166 ++-
 .../Transforms/LoopVectorize/reduction-inloop.ll   |  359 +++--
 .../Transforms/LoopVectorize/reduction-order.ll    |    2 +-
 .../LoopVectorize/reduction-predselect.ll          |  360 ++---
 .../test/Transforms/LoopVectorize/reduction-ptr.ll |   20 +-
 llvm/test/Transforms/LoopVectorize/reduction.ll    |  150 +-
 .../LoopVectorize/remarks-multi-exit-loops.ll      |    6 +-
 .../Transforms/LoopVectorize/remove_metadata.ll    |    6 +-
 .../Transforms/LoopVectorize/reverse_induction.ll  |   26 +-
 llvm/test/Transforms/LoopVectorize/reverse_iter.ll |    6 +-
 .../LoopVectorize/runtime-check-address-space.ll   |   68 +-
 .../runtime-check-pointer-element-type.ll          |   14 +-
 .../runtime-check-readonly-address-space.ll        |   70 +-
 .../LoopVectorize/runtime-check-readonly.ll        |   20 +-
 .../Transforms/LoopVectorize/runtime-drop-crash.ll |   17 +-
 llvm/test/Transforms/LoopVectorize/safegep.ll      |   24 +-
 .../Transforms/LoopVectorize/same-base-access.ll   |   56 +-
 .../Transforms/LoopVectorize/scalable-assume.ll    |   36 +-
 .../scalable-first-order-recurrence.ll             |   61 +-
 .../LoopVectorize/scalable-inductions.ll           |  106 +-
 .../scalable-loop-unpredicated-body-scalar-tail.ll |   40 +-
 .../LoopVectorize/scalable-noalias-scope-decl.ll   |   36 +-
 .../LoopVectorize/scalable-reduction-inloop.ll     |   10 +-
 .../LoopVectorize/scalable-trunc-min-bitwidth.ll   |   33 +-
 .../Transforms/LoopVectorize/scalable-vf-hint.ll   |   14 +-
 .../test/Transforms/LoopVectorize/scalar-select.ll |   12 +-
 .../LoopVectorize/scev-during-mutation.ll          |    8 +-
 .../LoopVectorize/select-cmp-predicated.ll         |   44 +-
 llvm/test/Transforms/LoopVectorize/select-cmp.ll   |   54 +-
 .../test/Transforms/LoopVectorize/simple-unroll.ll |    6 +-
 .../Transforms/LoopVectorize/skip-iterations.ll    |   28 +-
 llvm/test/Transforms/LoopVectorize/small-loop.ll   |   24 +-
 .../Transforms/LoopVectorize/start-non-zero.ll     |    8 +-
 .../Transforms/LoopVectorize/store-shuffle-bug.ll  |   16 +-
 .../test/Transforms/LoopVectorize/struct_access.ll |   12 +-
 llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll   |   40 +-
 llvm/test/Transforms/LoopVectorize/tripcount.ll    |   48 +-
 .../Transforms/LoopVectorize/trunc-reductions.ll   |   57 +-
 .../Transforms/LoopVectorize/undef-inst-bug.ll     |    4 +-
 .../unroll-novec-memcheck-metadata.ll              |   12 +-
 llvm/test/Transforms/LoopVectorize/unroll.ll       |    6 +-
 .../Transforms/LoopVectorize/unroll_nonlatch.ll    |   26 +-
 llvm/test/Transforms/LoopVectorize/unroll_novec.ll |   16 +-
 .../Transforms/LoopVectorize/unsafe-dep-remark.ll  |   28 +-
 .../LoopVectorize/unsafe-vf-hint-remark.ll         |   14 +-
 .../test/Transforms/LoopVectorize/value-ptr-bug.ll |   24 +-
 .../LoopVectorize/vect-phiscev-sext-trunc.ll       |   32 +-
 llvm/test/Transforms/LoopVectorize/vect.stats.ll   |   22 +-
 llvm/test/Transforms/LoopVectorize/vector-geps.ll  |   46 +-
 .../LoopVectorize/vectorize-pointer-phis.ll        |   66 +-
 .../Transforms/LoopVectorize/vectorizeVFone.ll     |    8 +-
 .../Transforms/LoopVectorize/version-mem-access.ll |   28 +-
 .../Transforms/LoopVectorize/vplan-dot-printing.ll |   10 +-
 .../LoopVectorize/vplan-printing-outer-loop.ll     |    8 +-
 .../vplan-sink-scalars-and-merge-vf1.ll            |   10 +-
 .../vplan-stress-test-no-explict-vf.ll             |    8 +-
 .../vplan-widen-select-instruction.ll              |   40 +-
 .../LoopVectorize/vplan_hcfg_stress_test.ll        |   10 +-
 llvm/test/Transforms/LoopVectorize/write-only.ll   |   16 +-
 371 files changed, 9572 insertions(+), 9995 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index e1b922f..279d4e8 100644
--- a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -6,15 +6,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: icmp eq <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
-define i32 @foo(i32 %x, i32 %t, i32* nocapture %A) nounwind uwtable ssp {
+define i32 @foo(i32 %x, i32 %t, ptr nocapture %A) nounwind uwtable ssp {
 entry:
   %cmp10 = icmp sgt i32 %x, 0
   br i1 %cmp10, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %if.end
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
@@ -28,7 +28,7 @@ if.then:                                          ; preds = %for.body
 
 if.end:                                           ; preds = %for.body, %if.then
   %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
-  store i32 %z.0, i32* %arrayidx, align 4
+  store i32 %z.0, ptr %arrayidx, align 4
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %x
diff --git a/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index 7118668..e6ac85d 100644
--- a/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/llvm/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -8,41 +8,38 @@ module asm "\09.ident\09\22GCC: (GNU) 4.6.3 LLVM: 3.2svn\22"
 
 @b = common global [32000 x float] zeroinitializer, align 16
 
-define i32 @set1ds(i32 %_n, float* nocapture %arr, float %value, i32 %stride) nounwind uwtable {
+define i32 @set1ds(i32 %_n, ptr nocapture %arr, float %value, i32 %stride) nounwind uwtable {
 entry:
   %0 = icmp sgt i32 %_n, 0
   br i1 %0, label %"3.lr.ph", label %"5"
 
 "3.lr.ph":                                        ; preds = %entry
-  %1 = bitcast float* %arr to i8*
-  %2 = sext i32 %stride to i64
+  %1 = sext i32 %stride to i64
   br label %"3"
 
 "3":                                              ; preds = %"3.lr.ph", %"3"
   %indvars.iv = phi i64 [ 0, %"3.lr.ph" ], [ %indvars.iv.next, %"3" ]
-  %3 = shl nsw i64 %indvars.iv, 2
-  %4 = getelementptr inbounds i8, i8* %1, i64 %3
-  %5 = bitcast i8* %4 to float*
-  store float %value, float* %5, align 4
-  %indvars.iv.next = add i64 %indvars.iv, %2
-  %6 = trunc i64 %indvars.iv.next to i32
-  %7 = icmp slt i32 %6, %_n
-  br i1 %7, label %"3", label %"5"
+  %2 = shl nsw i64 %indvars.iv, 2
+  %3 = getelementptr inbounds i8, ptr %arr, i64 %2
+  store float %value, ptr %3, align 4
+  %indvars.iv.next = add i64 %indvars.iv, %1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %5 = icmp slt i32 %4, %_n
+  br i1 %5, label %"3", label %"5"
 
 "5":                                              ; preds = %"3", %entry
   ret i32 0
 }
 
-define i32 @init(i8* nocapture %name) unnamed_addr nounwind uwtable {
+define i32 @init(ptr nocapture %name) unnamed_addr nounwind uwtable {
 entry:
   br label %"3"
 
 "3":                                              ; preds = %"3", %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"3" ]
   %0 = shl nsw i64 %indvars.iv, 2
-  %1 = getelementptr inbounds i8, i8* bitcast (float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0
-  %2 = bitcast i8* %1 to float*
-  store float -1.000000e+00, float* %2, align 4
+  %1 = getelementptr inbounds i8, ptr getelementptr inbounds ([32000 x float], ptr @b, i64 0, i64 16000), i64 %0
+  store float -1.000000e+00, ptr %1, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 16000
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll
index a7bf052..798a9cf 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/Oz-and-forced-vectorize.ll
@@ -7,7 +7,7 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
-define void @foo(float* noalias nocapture %ptrA, float* noalias nocapture readonly %ptrB, i64 %size) {
+define void @foo(ptr noalias nocapture %ptrA, ptr noalias nocapture readonly %ptrB, i64 %size) {
 ; CHECK-LABEL: @foo(
 ; CHECK: fmul <4 x float>
 ;
@@ -20,12 +20,12 @@ for.cond:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 
 for.body:                                         ; preds = %for.cond
-  %arrayidx = getelementptr inbounds float, float* %ptrB, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %ptrA, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %ptrB, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %ptrA, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul float %0, %1
-  store float %mul3, float* %arrayidx2, align 4
+  store float %mul3, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   br label %for.cond, !llvm.loop !0
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
index 5309f03..cc6ef0f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-predication.ll
@@ -16,16 +16,15 @@ target triple = "aarch64--linux-gnu"
 ; COST:        LV: Found an estimated cost of 4 for VF 2 For instruction: %var4 = udiv i64 %var2, %var3
 ;
 ;
-define i64 @predicated_udiv_scalarized_operand(i64* %a, i64 %x) optsize {
+define i64 @predicated_udiv_scalarized_operand(ptr %a, i64 %x) optsize {
 ; CHECK-LABEL: @predicated_udiv_scalarized_operand(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[TMP17:%.*]], [[PRED_UDIV_CONTINUE2]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
@@ -64,8 +63,8 @@ entry:
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
   %r = phi i64 [ 0, %entry ], [ %var6, %for.inc ]
-  %var0 = getelementptr inbounds i64, i64* %a, i64 %i
-  %var2 = load i64, i64* %var0, align 4
+  %var0 = getelementptr inbounds i64, ptr %a, i64 %i
+  %var2 = load i64, ptr %var0, align 4
   %cond0 = icmp sgt i64 %var2, 0
   br i1 %cond0, label %if.then, label %for.inc
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
index b9f97b0..4df0ad9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ; Function Attrs: nounwind
-define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr %c, i32 %size) {
 ;CHECK-LABEL: array_add
 ;CHECK: load <4 x i32>
 ;CHECK: load <4 x i32>
@@ -22,13 +22,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %size
@@ -38,5 +38,5 @@ for.end.loopexit:                                 ; preds = %for.body
   br label %for.end
 
 for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret i32* %c
+  ret ptr %c
 }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
index 1e91674..8e963fc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ; Function Attrs: nounwind
-define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
+define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr %c, i32 %size) {
 ;CHECK-LABEL: array_add
 ;CHECK: load <4 x i32>
 ;CHECK: load <4 x i32>
@@ -22,13 +22,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %size
@@ -38,5 +38,5 @@ for.end.loopexit:                                 ; preds = %for.body
   br label %for.end
 
 for.end:                                          ; preds = %for.end.loopexit, %entry
-  ret i32* %c
+  ret ptr %c
 }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
index d7d8bc8..c47a630 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
@@ -12,9 +12,9 @@
 ; CHECK-LABEL: test_sge
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_sge(i32* noalias %A,
-                      i32* noalias %B,
-                      i32* noalias %C, i32 %N) {
+define void @test_sge(ptr noalias %A,
+                      ptr noalias %B,
+                      ptr noalias %C, i32 %N) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
   br i1 %cmp13, label %for.end, label %for.body.preheader
@@ -27,15 +27,15 @@ for.body:
   %indvars.next = add i16 %indvars.iv, 1
   %indvars.ext = zext i16 %indvars.iv to i32
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = mul i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp sge i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -50,9 +50,9 @@ for.end:
 ; CHECK-LABEL: test_uge
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_uge(i32* noalias %A,
-                      i32* noalias %B,
-                      i32* noalias %C, i32 %N, i32 %Offset) {
+define void @test_uge(ptr noalias %A,
+                      ptr noalias %B,
+                      ptr noalias %C, i32 %N, i32 %Offset) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
   br i1 %cmp13, label %for.end, label %for.body.preheader
@@ -67,15 +67,15 @@ for.body:
   %indvars.ext = sext i16 %indvars.iv to i32
   %indvars.access = add i32 %Offset, %indvars.ext
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.access
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.access
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = add i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.access
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp uge i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -90,9 +90,9 @@ for.end:
 ; CHECK-LABEL: test_ule
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_ule(i32* noalias %A,
-                      i32* noalias %B,
-                      i32* noalias %C, i32 %N,
+define void @test_ule(ptr noalias %A,
+                      ptr noalias %B,
+                      ptr noalias %C, i32 %N,
                       i16 %M) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
@@ -106,15 +106,15 @@ for.body:
   %indvars.next = sub i16 %indvars.iv, 1
   %indvars.ext = zext i16 %indvars.iv to i32
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = mul i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp ule i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -129,9 +129,9 @@ for.end:
 ; CHECK-LABEL: test_sle
 ; CHECK-LABEL: vector.scevcheck
 ; CHECK-LABEL: vector.body
-define void @test_sle(i32* noalias %A,
-                   i32* noalias %B,
-                   i32* noalias %C, i32 %N,
+define void @test_sle(ptr noalias %A,
+                   ptr noalias %B,
+                   ptr noalias %C, i32 %N,
                    i16 %M) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
@@ -145,15 +145,15 @@ for.body:
   %indvars.next = sub i16 %indvars.iv, 1
   %indvars.ext = sext i16 %indvars.iv to i32
 
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %indvars.ext
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i32 %indvars.ext
+  %1 = load i32, ptr %arrayidx3, align 4
 
   %mul4 = mul i32 %1, %0
 
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i32 %indvars.ext
+  store i32 %mul4, ptr %arrayidx7, align 4
 
   %exitcond = icmp sle i32 %indvars.ext, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
index 2236cfe..c0b2073 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -14,14 +14,14 @@ target triple = "aarch64"
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 
-define void @f1(i32* %A) #0 {
+define void @f1(ptr %A) #0 {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv
-  store i32 1, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %iv
+  store i32 1, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp ne i64 %iv.next, 1024
   br i1 %exitcond, label %for.body, label %exit
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll
index 2a6cb5a..113a361 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll
@@ -7,7 +7,7 @@
 ; maximum VF for NEON is calculated by 128/size of smallest type in loop.
 ; And while we don't have an instruction to  load 4 x i8, vectorization
 ; might still be profitable.
-define void @test_load_i8_store_i32(i8* noalias %src, i32* noalias %dst, i32 %off, i64 %N) {
+define void @test_load_i8_store_i32(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) {
 ; CHECK-LABEL: @test_load_i8_store_i32(
 ; CHECK:       <16 x i8>
 ;
@@ -16,12 +16,12 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i8, i8* %src, i64 %iv
-  %lv = load i8, i8* %gep.src, align 1
+  %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv
+  %lv = load i8, ptr %gep.src, align 1
   %lv.ext = zext i8 %lv to i32
   %add = add i32 %lv.ext, %off
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv
-  store i32 %add, i32* %gep.dst
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 %add, ptr %gep.dst
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %exit, label %loop
@@ -31,7 +31,7 @@ exit:
 }
 
 ; Same as test_load_i8_store_i32, but with types flipped for load and store.
-define void @test_load_i32_store_i8(i32* noalias %src, i8* noalias %dst, i32 %off, i64 %N) {
+define void @test_load_i32_store_i8(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) {
 ; CHECK-LABEL: @test_load_i32_store_i8(
 ; CHECK:     <16 x i8>
 ;
@@ -40,12 +40,12 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %lv = load i32, i32* %gep.src, align 1
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %lv = load i32, ptr %gep.src, align 1
   %add = add i32 %lv, %off
   %add.trunc = trunc i32 %add to i8
-  %gep.dst = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %add.trunc, i8* %gep.dst
+  %gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %add.trunc, ptr %gep.dst
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %exit, label %loop
@@ -55,7 +55,7 @@ exit:
 }
 
 ; All memory operations use i32, all memory operations are profitable with VF 4.
-define void @test_load_i32_store_i32(i32* noalias %src, i32* noalias %dst, i8 %off, i64 %N) {
+define void @test_load_i32_store_i32(ptr noalias %src, ptr noalias %dst, i8 %off, i64 %N) {
 ; CHECK-LABEL: @test_load_i32_store_i32(
 ; CHECK: vector.body:
 ; CHECK:   <4 x i32>
@@ -65,13 +65,13 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %lv = load i32, i32* %gep.src, align 1
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %lv = load i32, ptr %gep.src, align 1
   %lv.trunc = trunc i32 %lv to i8
   %add = add i8 %lv.trunc, %off
   %add.ext = zext i8 %add to i32
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv
-  store i32 %add.ext, i32* %gep.dst
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 %add.ext, ptr %gep.dst
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %exit, label %loop
@@ -83,7 +83,7 @@ exit:
 ; Test with loop body that requires a large number of vector registers if the
 ; vectorization factor is large. Make sure the register estimates limit the
 ; vectorization factor.
-define void @test_load_i8_store_i64_large(i8* noalias %src, i64* noalias %dst, i64* noalias %dst.2, i64* noalias %dst.3, i64* noalias %dst.4, i64* noalias %dst.5, i64%off, i64 %off.2, i64 %N) {
+define void @test_load_i8_store_i64_large(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, ptr noalias %dst.3, ptr noalias %dst.4, ptr noalias %dst.5, i64%off, i64 %off.2, i64 %N) {
 ; CHECK-LABEL: @test_load_i8_store_i64_large
 ; CHECK: <8 x i64>
 ;
@@ -92,28 +92,28 @@ entry:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
-  %gep.src = getelementptr inbounds i8, i8* %src, i64 %iv
-  %gep.dst.3 = getelementptr inbounds i64, i64* %dst.3, i64 %iv
-  %lv.dst.3 = load i64, i64* %gep.dst.3, align 1
-  %gep.dst.5 = getelementptr inbounds i64, i64* %dst.5, i64 %iv
-  %lv.dst.5 = load i64, i64* %gep.dst.3, align 1
+  %gep.src = getelementptr inbounds i8, ptr %src, i64 %iv
+  %gep.dst.3 = getelementptr inbounds i64, ptr %dst.3, i64 %iv
+  %lv.dst.3 = load i64, ptr %gep.dst.3, align 1
+  %gep.dst.5 = getelementptr inbounds i64, ptr %dst.5, i64 %iv
+  %lv.dst.5 = load i64, ptr %gep.dst.3, align 1
 
-  %lv = load i8, i8* %gep.src, align 1
+  %lv = load i8, ptr %gep.src, align 1
   %lv.ext = zext i8 %lv to i64
   %add = add i64 %lv.ext, %off
   %add.2 = add i64 %add, %off.2
-  %gep.dst = getelementptr inbounds i64, i64* %dst, i64 %iv
-  %gep.dst.2 = getelementptr inbounds i64, i64* %dst.2, i64 %iv
+  %gep.dst = getelementptr inbounds i64, ptr %dst, i64 %iv
+  %gep.dst.2 = getelementptr inbounds i64, ptr %dst.2, i64 %iv
 
   %add.3 = add i64 %add.2, %lv.dst.3
   %add.4 = add i64 %add.3, %add
-  %gep.dst.4 = getelementptr inbounds i64, i64* %dst.4, i64 %iv
+  %gep.dst.4 = getelementptr inbounds i64, ptr %dst.4, i64 %iv
   %add.5 = add i64 %add.2, %lv.dst.5
-  store i64 %add.2, i64* %gep.dst.2
-  store i64 %add, i64* %gep.dst
-  store i64 %add.3, i64* %gep.dst.3
-  store i64 %add.4, i64* %gep.dst.4
-  store i64 %add.5, i64* %gep.dst.5
+  store i64 %add.2, ptr %gep.dst.2
+  store i64 %add, ptr %gep.dst
+  store i64 %add.3, ptr %gep.dst.3
+  store i64 %add.4, ptr %gep.dst.4
+  store i64 %add.5, ptr %gep.dst.5
 
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
index 4671c6f..8079c1e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
@@ -30,32 +30,32 @@ for.body:
   %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
   %add = add i64 %v.055, %offset
   %mul = mul i64 %add, 3
-  %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %mul
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 %v.055
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
-  %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 %v.055
+  %2 = load float, ptr %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
-  %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
-  %3 = load float, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 %v.055
+  %3 = load float, ptr %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
-  %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
-  %4 = load float, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 %v.055
+  %4 = load float, ptr %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i64 %mul, 1
-  %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
-  %5 = load float, float* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum
+  %5 = load float, ptr %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
   %mul19 = fmul fast float %4, %mul17
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i64 %mul, 2
-  %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
-  %6 = load float, float* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum52
+  %6 = load float, ptr %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
@@ -78,8 +78,8 @@ for.end:
   %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
-  store i8 %r.0.lcssa, i8* @r_, align 1
-  store i8 %g.0.lcssa, i8* @g_, align 1
-  store i8 %b.0.lcssa, i8* @b_, align 1
+  store i8 %r.0.lcssa, ptr @r_, align 1
+  store i8 %g.0.lcssa, ptr @g_, align 1
+  store i8 %b.0.lcssa, ptr @b_, align 1
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
index 99a5909..fe9631a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
@@ -11,22 +11,22 @@ target triple = "aarch64--linux-gnu"
 ; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 5
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDUCTION]] to i32
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDUCTION1]] to i32
-; CHECK-NEXT:    [[GEP0:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP4]]
-; CHECK-NEXT:    [[GEP1:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP5]]
-; CHECK-NEXT:    store i32 0, i32* [[GEP0]], align 4
-; CHECK-NEXT:    store i32 0, i32* [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP0:%.+]] = getelementptr inbounds i32, ptr %dst, i32 [[TMP4]]
+; CHECK-NEXT:    [[GEP1:%.+]] = getelementptr inbounds i32, ptr %dst, i32 [[TMP5]]
+; CHECK-NEXT:    store i32 0, ptr [[GEP0]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define void @non_primary_iv_trunc_free(i64 %n, i32* %dst) {
+define void @non_primary_iv_trunc_free(i64 %n, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
   %tmp0 = trunc i64 %i to i32
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i32 %tmp0
-  store i32 0, i32* %gep.dst
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %tmp0
+  store i32 0, ptr %gep.dst
   %i.next = add nuw nsw i64 %i, 5
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
index 5646ce6..7878121 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll
@@ -2,26 +2,25 @@
 
 ; In the loop below, both the current and previous values of a first-order
 ; recurrence are stored in an interleave group.
-define void @interleaved_store_first_order_recurrence(i32* noalias %src, i32* %dst) {
+define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @interleaved_store_first_order_recurrence(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 99>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SRC:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SRC:%.*]], align 4
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 2
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i32 -2
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <12 x i32>*
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -2
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i32> [[TMP11]], <12 x i32> poison, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
-; CHECK-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[TMP8]], align 4
+; CHECK-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP12]], label %middle.block, label %vector.body
@@ -32,14 +31,14 @@ entry:
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
   %for = phi i32 [ 99, %entry ],[ %for.next, %loop ]
-  %for.next = load i32, i32* %src, align 4
+  %for.next = load i32, ptr %src, align 4
   %off = mul nuw nsw i64 %iv, 3
-  %gep.1 = getelementptr inbounds i32, i32* %dst, i64 %off
-  store i32 0, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %gep.1, i64 1
-  store i32 %for, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %gep.1, i64 2
-  store i32 %for.next, i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %dst, i64 %off
+  store i32 0, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %gep.1, i64 1
+  store i32 %for, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %gep.1, i64 2
+  store i32 %for.next, ptr %gep.3, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %ec = icmp eq i64 %iv.next, 1000
   br i1 %ec, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
index 4add3f6..0184daf 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
@@ -17,19 +17,19 @@ target triple = "aarch64--linux-gnu"
 ; CHECK: insertelement <2 x i8> [[INSERT]], i8 [[LOAD2]], i32 1
 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
 
-define void @test(%pair* %p, i8* %q, i64 %n) {
+define void @test(ptr %p, ptr %q, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr %pair, %pair* %p, i64 %i, i32 0
-  %tmp1 = load i8, i8* %tmp0, align 1
-  %tmp2 = getelementptr %pair, %pair* %p, i64 %i, i32 1
-  %tmp3 = load i8, i8* %tmp2, align 1
+  %tmp0 = getelementptr %pair, ptr %p, i64 %i, i32 0
+  %tmp1 = load i8, ptr %tmp0, align 1
+  %tmp2 = getelementptr %pair, ptr %p, i64 %i, i32 1
+  %tmp3 = load i8, ptr %tmp2, align 1
   %add = add i8 %tmp1, %tmp3
-  %qi = getelementptr i8, i8* %q, i64 %i
-  store i8 %add, i8* %qi, align 1
+  %qi = getelementptr i8, ptr %q, i64 %i
+  store i8 %add, ptr %qi, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp eq i64 %i.next, %n
   br i1 %cond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
index 0d145ef..c61ddbc 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -8,28 +8,28 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnueabi"
 
 %i8.2 = type {i8, i8}
-define void @i8_factor_2(%i8.2* %data, i64 %n) {
+define void @i8_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
-; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
-  %tmp2 = load i8, i8* %tmp0, align 1
-  %tmp3 = load i8, i8* %tmp1, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
+  %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i8, ptr %tmp0, align 1
+  %tmp3 = load i8, ptr %tmp1, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -39,33 +39,33 @@ for.end:
 }
 
 %i16.2 = type {i16, i16}
-define void @i16_factor_2(%i16.2* %data, i64 %n) {
+define void @i16_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_4-LABEL: Checking a loop in 'i16_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
-; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
-  %tmp2 = load i16, i16* %tmp0, align 2
-  %tmp3 = load i16, i16* %tmp1, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i16, ptr %tmp0, align 2
+  %tmp3 = load i16, ptr %tmp1, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -75,38 +75,38 @@ for.end:
 }
 
 %i32.2 = type {i32, i32}
-define void @i32_factor_2(%i32.2* %data, i64 %n) {
+define void @i32_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -116,38 +116,38 @@ for.end:
 }
 
 %i64.2 = type {i64, i64}
-define void @i64_factor_2(%i64.2* %data, i64 %n) {
+define void @i64_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_2'
-; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1
-  %tmp2 = load i64, i64* %tmp0, align 8
-  %tmp3 = load i64, i64* %tmp1, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i64, ptr %tmp0, align 8
+  %tmp3 = load i64, ptr %tmp1, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -157,7 +157,7 @@ for.end:
 }
 
 %i64.8 = type {i64, i64, i64, i64, i64, i64, i64, i64}
-define void @i64_factor_8(%i64.8* %data, i64 %n) {
+define void @i64_factor_8(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
@@ -168,18 +168,18 @@ entry:
 ; gaps.
 ;
 ; VF_2-LABEL: Checking a loop in 'i64_factor_8'
-; VF_2:         Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:    Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_2:         Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 7 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 2
-  %tmp1 = getelementptr inbounds %i64.8, %i64.8* %data, i64 %i, i32 6
-  %tmp2 = load i64, i64* %tmp0, align 8
-  %tmp3 = load i64, i64* %tmp1, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 2
+  %tmp1 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 6
+  %tmp2 = load i64, ptr %tmp0, align 8
+  %tmp3 = load i64, ptr %tmp1, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index 63d83a2..e35bc97 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -4,11 +4,11 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64"
 
 ; CHECK-LABEL: @add_a(
-; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: load <16 x i8>, ptr
 ; CHECK: add <16 x i8>
 ; CHECK: store <16 x i8>
 ; Function Attrs: nounwind
-define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -18,13 +18,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i8
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv1, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -34,11 +34,11 @@ for.body:                                         ; preds = %entry, %for.body
 ; Ensure that we preserve nuw/nsw if we're not shrinking the values we're
 ; working with.
 ; CHECK-LABEL: @add_a1(
-; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: load <16 x i8>, ptr
 ; CHECK: add nuw nsw <16 x i8>
 ; CHECK: store <16 x i8>
 ; Function Attrs: nounwind
-define void @add_a1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -48,11 +48,11 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %add = add nuw nsw i8 %0, 2
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %add, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -60,11 +60,11 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: @add_b(
-; CHECK: load <8 x i16>, <8 x i16>*
+; CHECK: load <8 x i16>, ptr
 ; CHECK: add <8 x i16>
 ; CHECK: store <8 x i16>
 ; Function Attrs: nounwind
-define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp9 = icmp sgt i32 %len, 0
   br i1 %cmp9, label %for.body, label %for.cond.cleanup
@@ -74,13 +74,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx
+  %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx
   %conv8 = zext i16 %0 to i32
   %add = add nuw nsw i32 %conv8, 2
   %conv1 = trunc i32 %add to i16
-  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
-  store i16 %conv1, i16* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv
+  store i16 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -88,11 +88,11 @@ for.body:                                         ; preds = %entry, %for.body
 }
 
 ; CHECK-LABEL: @add_c(
-; CHECK: load <8 x i8>, <8 x i8>*
+; CHECK: load <8 x i8>, ptr
 ; CHECK: add <8 x i16>
 ; CHECK: store <8 x i16>
 ; Function Attrs: nounwind
-define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -102,13 +102,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i16
-  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
-  store i16 %conv1, i16* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i16, ptr %q, i64 %indvars.iv
+  store i16 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -119,7 +119,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: load <8 x i16>
 ; CHECK: add nsw <8 x i32>
 ; CHECK: store <8 x i32>
-define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 {
+define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp7 = icmp sgt i32 %len, 0
   br i1 %cmp7, label %for.body, label %for.cond.cleanup
@@ -129,12 +129,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx
+  %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx
   %conv = sext i16 %0 to i32
   %add = add nsw i32 %conv, 2
-  %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -151,7 +151,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: xor <16 x i8>
 ; CHECK: mul <16 x i8>
 ; CHECK: store <16 x i8>
-define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
 entry:
   %cmp.32 = icmp sgt i32 %len, 0
   br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
@@ -166,8 +166,8 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = shl i32 %conv, 4
   %conv2 = add nuw nsw i32 %add, 32
@@ -178,8 +178,8 @@ for.body:                                         ; preds = %for.body, %for.body
   %conv17 = xor i32 %mul.masked, %conv11
   %mul18 = mul nuw nsw i32 %conv17, %and
   %conv19 = trunc i32 %mul18 to i8
-  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv19, i8* %arrayidx21
+  %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv19, ptr %arrayidx21
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -197,7 +197,7 @@ for.body:                                         ; preds = %for.body, %for.body
 ; CHECK: xor <8 x i8>
 ; CHECK: mul <8 x i8>
 ; CHECK: store <8 x i8>
-define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
 entry:
   %cmp.32 = icmp sgt i32 %len, 0
   br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
@@ -212,8 +212,8 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx
+  %arrayidx = getelementptr inbounds i16, ptr %p, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx
   %conv = sext i16 %0 to i32
   %add = shl i32 %conv, 4
   %conv2 = add nsw i32 %add, 32
@@ -225,8 +225,8 @@ for.body:                                         ; preds = %for.body, %for.body
   %conv17 = xor i32 %mul.masked, %conv11
   %mul18 = mul nuw nsw i32 %conv17, %and
   %conv19 = trunc i32 %mul18 to i8
-  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv19, i8* %arrayidx21
+  %arrayidx21 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv19, ptr %arrayidx21
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -234,11 +234,11 @@ for.body:                                         ; preds = %for.body, %for.body
 }
 
 ; CHECK-LABEL: @add_phifail(
-; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: load <16 x i8>, ptr
 ; CHECK: add nuw nsw <16 x i32>
 ; CHECK: store <16 x i8>
 ; Function Attrs: nounwind
-define void @add_phifail(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 entry:
   %cmp8 = icmp sgt i32 %len, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -249,13 +249,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i8
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv1, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
@@ -267,10 +267,10 @@ for.body:                                         ; preds = %entry, %for.body
 ; even when %len exactly divides VF (since we extract from the second last index
 ; and pass this to the for.cond.cleanup block). Vectorized loop returns
 ; the correct value a_phi = p[len -2]
-define i8 @add_phifail2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
 ; CHECK-LABEL: @add_phifail2(
 ; CHECK: vector.body:
-; CHECK:   %wide.load = load <16 x i8>, <16 x i8>*
+; CHECK:   %wide.load = load <16 x i8>, ptr
 ; CHECK:   %[[L1:.+]] = zext <16 x i8> %wide.load to <16 x i32>
 ; CHECK:   add nuw nsw <16 x i32>
 ; CHECK:   store <16 x i8>
@@ -293,13 +293,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv, 2
   %conv1 = trunc i32 %add to i8
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
-  store i8 %conv1, i8* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i64 %indvars.iv
+  store i8 %conv1, ptr %arrayidx3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %len
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll
index b1f0b27..263da07 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loopvectorize_pr33804_double.ll
@@ -10,22 +10,21 @@ source_filename = "bugpoint-output-26dbd81.bc"
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-%struct.CvNode1D = type { double, %struct.CvNode1D* }
+%struct.CvNode1D = type { double, ptr }
 
 ; CHECK-LABEL: @cvCalcEMD2
 ; CHECK: vector.body
-; CHECK: store <{{[0-9]+}} x %struct.CvNode1D*>
-define void @cvCalcEMD2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: store <{{[0-9]+}} x ptr>
+define void @cvCalcEMD2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0
-  store double 0xC415AF1D80000000, double* %val.i.i, align 4
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1
-  store %struct.CvNode1D* undef, %struct.CvNode1D** %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i
+  store double 0xC415AF1D80000000, ptr %arrayidx15.i.i1427, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i, i32 1
+  store ptr undef, ptr %next19.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
@@ -36,22 +35,22 @@ for.end22.i.i:                                    ; preds = %for.body14.i.i
 
 ; This test checks when a pointer value is stored into a double type.
 
-%struct.CvNode1D2 = type { %struct.CvNode1D2*, double }
+%struct.CvNode1D2 = type { ptr, double }
 
 ; CHECK-LABEL: @cvCalcEMD2_2
 ; CHECK: vector.body
 ; CHECK: store <{{[0-9]+}} x double>
-define void @cvCalcEMD2_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @cvCalcEMD2_2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0
-  store %struct.CvNode1D2* undef, %struct.CvNode1D2** %next19.i.i, align 4
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1
-  store double 0xC415AF1D80000000, double* %val.i.i, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i, i32 0
+  store ptr undef, ptr %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i
+  %val.i.i = getelementptr inbounds %struct.CvNode1D2, ptr %arrayidx15.i.i1427, i32 0, i32 1
+  store double 0xC415AF1D80000000, ptr %val.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
@@ -65,17 +64,16 @@ for.end22.i.i:                                    ; preds = %for.body14.i.i
 ; CHECK-LABEL: @cvCalcEMD3
 ; CHECK: vector.body
 ; CHECK: inttoptr <{{[0-9]+}} x i64>
-define void @cvCalcEMD3() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @cvCalcEMD3() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* %arrayidx15.i.i1427, i32 0, i32 0
-  %load_d = load double, double* %val.i.i, align 4
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D, %struct.CvNode1D* undef, i32 %i.1424.i.i, i32 1
-  %load_p = load %struct.CvNode1D*, %struct.CvNode1D** %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i
+  %load_d = load double, ptr %arrayidx15.i.i1427, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D, ptr undef, i32 %i.1424.i.i, i32 1
+  %load_p = load ptr, ptr %next19.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
@@ -88,18 +86,18 @@ for.end22.i.i:                                    ; preds = %for.body14.i.i
 
 ; CHECK-LABEL: @cvCalcEMD3_2
 ; CHECK: vector.body
-; CHECK: ptrtoint <{{[0-9]+}} x %struct.CvNode1D2*>
-define void @cvCalcEMD3_2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK: ptrtoint <{{[0-9]+}} x ptr>
+define void @cvCalcEMD3_2() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body14.i.i
 
 for.body14.i.i:                                   ; preds = %for.body14.i.i, %entry
   %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %entry ]
-  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i, i32 0
-  %load_p = load %struct.CvNode1D2*, %struct.CvNode1D2** %next19.i.i, align 4
-  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* undef, i32 %i.1424.i.i
-  %val.i.i = getelementptr inbounds %struct.CvNode1D2, %struct.CvNode1D2* %arrayidx15.i.i1427, i32 0, i32 1
-  %load_d = load double, double* %val.i.i, align 4
+  %next19.i.i = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i, i32 0
+  %load_p = load ptr, ptr %next19.i.i, align 4
+  %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D2, ptr undef, i32 %i.1424.i.i
+  %val.i.i = getelementptr inbounds %struct.CvNode1D2, ptr %arrayidx15.i.i1427, i32 0, i32 1
+  %load_d = load double, ptr %val.i.i, align 4
   %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1
   %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0
   br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index eb96768..d1e9f25 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux-gnu"
 
 ; A call whose argument must be widened. We check that tail folding uses the
 ; primary mask, and that without tail folding we synthesize an all-true mask.
-define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -15,18 +15,16 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFNONE:       vector.body:
 ; TFNONE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFNONE-NEXT:    [[TMP0:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
-; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; TFNONE-NEXT:    [[TMP0:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4
 ; TFNONE-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; TFNONE-NEXT:    [[TMP3:%.*]] = call i64 @foo(i64 [[TMP2]]) #[[ATTR2:[0-9]+]]
 ; TFNONE-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
 ; TFNONE-NEXT:    [[TMP5:%.*]] = call i64 @foo(i64 [[TMP4]]) #[[ATTR2]]
 ; TFNONE-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
 ; TFNONE-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP5]], i32 1
-; TFNONE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>*
-; TFNONE-NEXT:    store <2 x i64> [[TMP7]], <2 x i64>* [[TMP9]], align 4
+; TFNONE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    store <2 x i64> [[TMP7]], ptr [[TMP8]], align 4
 ; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; TFNONE-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -37,11 +35,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFNONE-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2]]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -53,11 +51,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFALWAYS-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -71,18 +69,16 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFFALLBACK:       vector.body:
 ; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
-; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 4
+; TFFALLBACK-NEXT:    [[TMP0:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4
 ; TFFALLBACK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
 ; TFFALLBACK-NEXT:    [[TMP3:%.*]] = call i64 @foo(i64 [[TMP2]]) #[[ATTR2:[0-9]+]]
 ; TFFALLBACK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
 ; TFFALLBACK-NEXT:    [[TMP5:%.*]] = call i64 @foo(i64 [[TMP4]]) #[[ATTR2]]
 ; TFFALLBACK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
 ; TFFALLBACK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP5]], i32 1
-; TFFALLBACK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>*
-; TFFALLBACK-NEXT:    store <2 x i64> [[TMP7]], <2 x i64>* [[TMP9]], align 4
+; TFFALLBACK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    store <2 x i64> [[TMP7]], ptr [[TMP8]], align 4
 ; TFFALLBACK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; TFFALLBACK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -93,11 +89,11 @@ define void @test_widen(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFFALLBACK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2]]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -109,11 +105,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep = getelementptr i64, i64* %b, i64 %indvars.iv
-  %load = load i64, i64* %gep
+  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
+  %load = load i64, ptr %gep
   %call = call i64 @foo(i64 %load) #1
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -123,14 +119,14 @@ for.cond.cleanup:
 }
 
 ; Check that a simple conditional call can be vectorized.
-define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_if_then(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFNONE-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFNONE-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; TFNONE:       if.then:
@@ -138,8 +134,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[IF_END]]
 ; TFNONE:       if.end:
 ; TFNONE-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -151,8 +147,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFALWAYS-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFALWAYS-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; TFALWAYS:       if.then:
@@ -160,8 +156,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[IF_END]]
 ; TFALWAYS:       if.end:
 ; TFALWAYS-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -173,8 +169,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFFALLBACK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFFALLBACK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]]
 ; TFFALLBACK:       if.then:
@@ -182,8 +178,8 @@ define void @test_if_then(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[IF_END]]
 ; TFFALLBACK:       if.end:
 ; TFFALLBACK-NEXT:    [[TMP2:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[TMP2]], i64* [[ARRAYIDX1]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -195,8 +191,8 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %cmp = icmp ugt i64 %0, 50
   br i1 %cmp, label %if.then, label %if.end
 
@@ -206,8 +202,8 @@ if.then:
 
 if.end:
   %2 = phi i64 [%1, %if.then], [0, %for.body]
-  %arrayidx1 = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  store i64 %2, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  store i64 %2, ptr %arrayidx1, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -220,14 +216,14 @@ for.cond.cleanup:
 ; calls inside the conditional blocks. Although one of the calls has a
 ; uniform parameter and the metadata lists a uniform variant, right now
 ; we just see a splat of the parameter instead. More work needed.
-define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_if_then_else(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFNONE-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFNONE-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TFNONE:       if.then:
@@ -238,8 +234,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[IF_END]]
 ; TFNONE:       if.end:
 ; TFNONE-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
-; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8
+; TFNONE-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -251,8 +247,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFALWAYS-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFALWAYS-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TFALWAYS:       if.then:
@@ -263,8 +259,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[IF_END]]
 ; TFALWAYS:       if.end:
 ; TFALWAYS-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
-; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8
+; TFALWAYS-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -276,8 +272,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; TFFALLBACK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 50
 ; TFFALLBACK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; TFFALLBACK:       if.then:
@@ -288,8 +284,8 @@ define void @test_widen_if_then_else(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[IF_END]]
 ; TFFALLBACK:       if.end:
 ; TFFALLBACK-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP1]], [[IF_THEN]] ], [ [[TMP2]], [[IF_ELSE]] ]
-; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[TMP3]], i64* [[ARRAYIDX1]], align 8
+; TFFALLBACK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -301,8 +297,8 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %cmp = icmp ugt i64 %0, 50
   br i1 %cmp, label %if.then, label %if.else
 
@@ -316,8 +312,8 @@ if.else:
 
 if.end:
   %3 = phi i64 [%1, %if.then], [%2, %if.else]
-  %arrayidx1 = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  store i64 %3, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  store i64 %3, ptr %arrayidx1, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -329,7 +325,7 @@ for.cond.cleanup:
 ; A call whose argument must be widened, where the vector variant does not have
 ; a mask. Forcing tail folding results in no vectorized call, whereas an
 ; unpredicated body with scalar tail can use the unmasked variant.
-define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_nomask(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -344,13 +340,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFNONE:       vector.body:
 ; TFNONE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFNONE-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFNONE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -364,11 +358,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFNONE-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -380,11 +374,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFALWAYS-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -405,13 +399,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFFALLBACK:       vector.body:
 ; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFFALLBACK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFFALLBACK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFFALLBACK-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFFALLBACK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -425,11 +417,11 @@ define void @test_widen_nomask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFFALLBACK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -441,11 +433,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep = getelementptr i64, i64* %b, i64 %indvars.iv
-  %load = load i64, i64* %gep
+  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
+  %load = load i64, ptr %gep
   %call = call i64 @foo(i64 %load) #2
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -457,7 +449,7 @@ for.cond.cleanup:
 ; If both masked and unmasked options are present, we expect to see tail folding
 ; use the masked version and unpredicated body with scalar tail use the unmasked
 ; version.
-define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
+define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
 ; TFNONE-LABEL: @test_widen_optmask(
 ; TFNONE-NEXT:  entry:
 ; TFNONE-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -472,13 +464,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFNONE:       vector.body:
 ; TFNONE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFNONE-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFNONE-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFNONE-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFNONE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFNONE-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFNONE-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFNONE-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFNONE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -492,11 +482,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFNONE-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFNONE:       for.body:
 ; TFNONE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFNONE-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFNONE-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
-; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFNONE-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFNONE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFNONE-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFNONE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFNONE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFNONE-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -508,11 +498,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFALWAYS-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFALWAYS:       for.body:
 ; TFALWAYS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFALWAYS-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFALWAYS-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]]
-; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDVARS_IV]]
-; TFALWAYS-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFALWAYS-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; TFALWAYS-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFALWAYS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFALWAYS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFALWAYS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -533,13 +523,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; TFFALLBACK:       vector.body:
 ; TFFALLBACK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP5]], align 4
+; TFFALLBACK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[B:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP4]], align 4
 ; TFFALLBACK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
-; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; TFFALLBACK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64>* [[TMP8]], align 4
+; TFFALLBACK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; TFFALLBACK-NEXT:    store <vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], align 4
 ; TFFALLBACK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; TFFALLBACK-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 2
 ; TFFALLBACK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -553,11 +541,11 @@ define void @test_widen_optmask(i64* noalias %a, i64* readnone %b) #4 {
 ; TFFALLBACK-NEXT:    br label [[FOR_BODY:%.*]]
 ; TFFALLBACK:       for.body:
 ; TFFALLBACK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, i64* [[B]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, i64* [[GEP]], align 4
+; TFFALLBACK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4
 ; TFFALLBACK-NEXT:    [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
-; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDVARS_IV]]
-; TFFALLBACK-NEXT:    store i64 [[CALL]], i64* [[ARRAYIDX]], align 4
+; TFFALLBACK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; TFFALLBACK-NEXT:    store i64 [[CALL]], ptr [[ARRAYIDX]], align 4
 ; TFFALLBACK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; TFFALLBACK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; TFFALLBACK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -569,11 +557,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep = getelementptr i64, i64* %b, i64 %indvars.iv
-  %load = load i64, i64* %gep
+  %gep = getelementptr i64, ptr %b, i64 %indvars.iv
+  %load = load i64, ptr %gep
   %call = call i64 @foo(i64 %load) #3
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
index 04c0f2d..6f6ce72 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll
@@ -5,12 +5,12 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 ; CHECK-COST: Checking a loop in 'fixed_width'
-; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction:   store i32 2, i32* %arrayidx1, align 4
-; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction:   store i32 2, i32* %arrayidx1, align 4
+; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction:   store i32 2, ptr %arrayidx1, align 4
+; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction:   store i32 2, ptr %arrayidx1, align 4
 ; CHECK-COST: Selecting VF: 1.
 
 ; We should decide this loop is not worth vectorising using fixed width vectors
-define void @fixed_width(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @fixed_width(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @fixed_width(
 ; CHECK-NOT: vector.body
 entry:
@@ -28,14 +28,14 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07
-  store i32 2, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07
+  store i32 2, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -46,12 +46,12 @@ for.inc:                                          ; preds = %for.body, %if.then
 
 
 ; CHECK-COST: Checking a loop in 'scalable'
-; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store i32 2, i32* %arrayidx1, align 4
+; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store i32 2, ptr %arrayidx1, align 4
 
-define void @scalable(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @scalable(
 ; CHECK: vector.body
-; CHECK: call void @llvm.masked.store.nxv4i32.p0nxv4i32
+; CHECK: call void @llvm.masked.store.nxv4i32.p0
 entry:
   %cmp6 = icmp sgt i64 %n, 0
   br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
@@ -67,14 +67,14 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07
-  store i32 2, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07
+  store i32 2, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
index 85f9e6d..3550d40 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
@@ -32,21 +32,21 @@ target triple = "aarch64--linux-gnu"
 ; CHECK: load <4 x i32>
 ; CHECK: store <4 x i32>
 
-define void @max_vf(%struct.pair* noalias nocapture %p) {
+define void @max_vf(ptr noalias nocapture %p) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %0 = add nuw nsw i64 %i, 2
-  %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0
-  %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0
-  %1 = load i32, i32* %p_i.x, align 4
-  store i32 %1, i32* %p_i_plus_2.x, align 4
-  %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1
-  %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1
-  %2 = load i32, i32* %p_i.y, align 4
-  store i32 %2, i32* %p_i_plus_2.y, align 4
+  %p_i.x = getelementptr inbounds %struct.pair, ptr %p, i64 %i, i32 0
+  %p_i_plus_2.x = getelementptr inbounds %struct.pair, ptr %p, i64 %0, i32 0
+  %1 = load i32, ptr %p_i.x, align 4
+  store i32 %1, ptr %p_i_plus_2.x, align 4
+  %p_i.y = getelementptr inbounds %struct.pair, ptr %p, i64 %i, i32 1
+  %p_i_plus_2.y = getelementptr inbounds %struct.pair, ptr %p, i64 %0, i32 1
+  %2 = load i32, ptr %p_i.y, align 4
+  store i32 %2, ptr %p_i_plus_2.y, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp eq i64 %i.next, 1000
   br i1 %cond, label %for.exit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
index 03cb59b..785241d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
@@ -9,14 +9,14 @@ target triple = "aarch64--linux-gnu"
 ; CHECK:       LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
 ; CHECK:       LV: Not considering vector loop of width 2 because it will not generate any vector instructions
 ;
-define void @all_scalar(i64* %a, i64 %n) {
+define void @all_scalar(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr i64, i64* %a, i64 %i
-  store i64 0, i64* %tmp0, align 1
+  %tmp0 = getelementptr i64, ptr %a, i64 %i
+  store i64 0, ptr %tmp0, align 1
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp eq i64 %i.next, %n
   br i1 %cond, label %for.end, label %for.body
@@ -30,15 +30,15 @@ for.end:
 ; CHECK:       LV: Found an estimated cost of 0 for VF 8 For instruction: %i.next = zext i32 %j.next to i64
 ; CHECK:       LV: Not considering vector loop of width 8 because it will not generate any vector instructions
 %struct.a = type { i32, i8 }
-define void @PR33193(%struct.a* %a, i64 %n) {
+define void @PR33193(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %j = phi i32 [ 0, %entry ], [ %j.next, %for.body ]
-  %tmp0 = getelementptr inbounds %struct.a, %struct.a* %a, i64 %i, i32 1
-  store i8 0, i8* %tmp0, align 4
+  %tmp0 = getelementptr inbounds %struct.a, ptr %a, i64 %i, i32 1
+  store i8 0, ptr %tmp0, align 4
   %j.next = add i32 %j, 1
   %i.next = zext i32 %j.next to i64
   %cond = icmp ugt i64 %n, %i.next
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
index f6228fc..80410a4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll
@@ -24,17 +24,17 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
@@ -59,17 +59,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3
@@ -91,15 +91,15 @@ for.end10:                                        ; preds = %for.inc8
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, <2 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[VecInd]], <2 x i64*> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[VecInd]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
 ; CHECK: %[[StoreVal:.*]] = add nsw <2 x i64> %[[VecInd]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %[[StoreVal]], <2 x i64*> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], <i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], <i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0
@@ -117,15 +117,15 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* @arrX, i64 0, i64 %indvars.iv21
-  store i64 %indvars.iv21, i64* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [8 x i64], ptr @arrX, i64 0, i64 %indvars.iv21
+  store i64 %indvars.iv21, ptr %arrayidx, align 4
   %add = add nsw i64 %indvars.iv21, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i64 %add, i64* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i64]], ptr @arrY, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i64 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
index f86540a..43d0e8f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
@@ -7,32 +7,32 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128-p:16:16-p4:32:16"
 
 ; Check that all the loads are scalarized
-; CHECK: load i16, i16*
-; CHECK: load i16, i16*
-; CHECK: load i16, i16 addrspace(4)*
-; CHECK: load i16, i16 addrspace(4)*
+; CHECK: load i16, ptr
+; CHECK: load i16, ptr
+; CHECK: load i16, ptr addrspace(4)
+; CHECK: load i16, ptr addrspace(4)
 ; CHECK: store <2 x i16>
 
 %rec1445 = type { i16, i16, i16, i16, i16 }
 
-define void @foo(%rec1445* %a, %rec1445 addrspace(4)* %b, i16* noalias %dst) {
+define void @foo(ptr %a, ptr addrspace(4) %b, ptr noalias %dst) {
 bb1:
   br label %bb4
 
 bb4:
   %tmp1 = phi i16 [ 0, %bb1 ], [ %_tmp1013, %bb4 ]
-  %tmp2 = phi %rec1445* [ %a, %bb1 ], [ %_tmp1015, %bb4 ]
-  %tmp3 = phi %rec1445 addrspace(4)* [ %b, %bb1 ], [ %_tmp1017, %bb4 ]
-  %0 = getelementptr %rec1445, %rec1445* %tmp2, i16 0, i32 1
-  %_tmp987 = load i16, i16* %0, align 1
-  %1 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 0, i32 1
-  %_tmp993 = load i16, i16 addrspace(4)* %1, align 1
+  %tmp2 = phi ptr [ %a, %bb1 ], [ %_tmp1015, %bb4 ]
+  %tmp3 = phi ptr addrspace(4) [ %b, %bb1 ], [ %_tmp1017, %bb4 ]
+  %0 = getelementptr %rec1445, ptr %tmp2, i16 0, i32 1
+  %_tmp987 = load i16, ptr %0, align 1
+  %1 = getelementptr %rec1445, ptr addrspace(4) %tmp3, i32 0, i32 1
+  %_tmp993 = load i16, ptr addrspace(4) %1, align 1
   %add = add i16 %_tmp987, %_tmp993
-  %dst.gep = getelementptr inbounds i16, i16* %dst, i16 %tmp1
-  store i16 %add, i16* %dst.gep
+  %dst.gep = getelementptr inbounds i16, ptr %dst, i16 %tmp1
+  store i16 %add, ptr %dst.gep
   %_tmp1013 = add i16 %tmp1, 1
-  %_tmp1015 = getelementptr %rec1445, %rec1445* %tmp2, i16 1
-  %_tmp1017 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 1
+  %_tmp1015 = getelementptr %rec1445, ptr %tmp2, i16 1
+  %_tmp1017 = getelementptr %rec1445, ptr addrspace(4) %tmp3, i32 1
   %_tmp1019 = icmp ult i16 %_tmp1013, 24
   br i1 %_tmp1019, label %bb4, label %bb16
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll
index bf93ec8..20b5364 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr33053.ll
@@ -3,19 +3,19 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
 @b = common local_unnamed_addr global i32 0, align 4
-@a = common local_unnamed_addr global i16* null, align 8
+@a = common local_unnamed_addr global ptr null, align 8
 
 define i32 @fn1() local_unnamed_addr #0 {
 ; We expect the backend to expand all reductions.
 ; CHECK: @llvm.vector.reduce
 entry:
-  %0 = load i32, i32* @b, align 4, !tbaa !1
+  %0 = load i32, ptr @b, align 4, !tbaa !1
   %cmp40 = icmp sgt i32 %0, 0
   br i1 %cmp40, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %1 = load i16*, i16** @a, align 8, !tbaa !5
-  %2 = load i32, i32* @b, align 4, !tbaa !1
+  %1 = load ptr, ptr @a, align 8, !tbaa !5
+  %2 = load i32, ptr @b, align 4, !tbaa !1
   %3 = sext i32 %2 to i64
   br label %for.body
 
@@ -23,8 +23,8 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %d.043 = phi i16 [ undef, %for.body.lr.ph ], [ %.sink28, %for.body ]
   %c.042 = phi i16 [ undef, %for.body.lr.ph ], [ %c.0., %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %1, i64 %indvars.iv
-  %4 = load i16, i16* %arrayidx, align 2, !tbaa !7
+  %arrayidx = getelementptr inbounds i16, ptr %1, i64 %indvars.iv
+  %4 = load i16, ptr %arrayidx, align 2, !tbaa !7
   %cmp2 = icmp sgt i16 %c.042, %4
   %c.0. = select i1 %cmp2, i16 %c.042, i16 %4
   %cmp13 = icmp slt i16 %d.043, %4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
index e4e415a..44820e0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 define void @_Z1dv() local_unnamed_addr #0 {
 ; CHECK-LABEL: @_Z1dv(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @"_ZN3$_01aEv"(ptr nonnull @b)
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[F_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD5:%.*]], [[FOR_COND_CLEANUP:%.*]] ]
@@ -36,16 +36,16 @@ define void @_Z1dv() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[CONV]], [[TMP1]]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[ADD]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i8 [[TMP2]], i8* [[ARRAYIDX3]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], ptr @c, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 [[TMP2]], ptr [[ARRAYIDX3]], align 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
 ;
 entry:
-  %call = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) #2
+  %call = tail call ptr @"_ZN3$_01aEv"(ptr nonnull @b) #2
   br label %for.cond
 
 for.cond:                                         ; preds = %for.cond.cleanup, %entry
@@ -72,13 +72,13 @@ for.body:                                         ; preds = %for.body, %for.body
   %1 = trunc i64 %indvars.iv to i32
   %add = add i32 %conv, %1
   %idxprom = zext i32 %add to i64
-  %arrayidx = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 %idxprom
-  %2 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %call, i64 %indvars.iv
-  store i8 %2, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds [6 x i8], ptr @c, i64 0, i64 %idxprom
+  %2 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %call, i64 %indvars.iv
+  store i8 %2, ptr %arrayidx3, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 4
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-declare i8* @"_ZN3$_01aEv"(%struct.anon*) local_unnamed_addr #1
+declare ptr @"_ZN3$_01aEv"(ptr) local_unnamed_addr #1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll
index 2a974e5..ae5e7d0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr46950-load-cast-context-crash.ll
@@ -3,9 +3,9 @@
 ; CHECK-LABEL: define void @test(
 ; CHECK: vector.body
 
-define void @test(i64* %dst, i32* %src) {
+define void @test(ptr %dst, ptr %src) {
 entry:
-  %l = load i32, i32* %src
+  %l = load i32, ptr %src
   br label %loop.ph
 
 loop.ph:
@@ -14,8 +14,8 @@ loop.ph:
 loop:
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ]
   %l.cast = sext i32 %l to i64
-  %dst.idx = getelementptr i64, i64* %dst, i64 %iv
-  store i64 %l.cast, i64* %dst.idx
+  %dst.idx = getelementptr i64, ptr %dst, i64 %iv
+  store i64 %l.cast, ptr %dst.idx
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp9.us = icmp ult i64 %iv.next, 20
   br i1 %cmp9.us, label %loop, label %exit
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
index 32c0076..ff544df 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/predication_costs.ll
@@ -21,17 +21,17 @@ target triple = "aarch64--linux-gnu"
 ; CHECK: Scalarizing and predicating: %tmp4 = udiv i32 %tmp2, %tmp3
 ; CHECK: Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
 ;
-define i32 @predicated_udiv(i32* %a, i32* %b, i1 %c, i64 %n) {
+define i32 @predicated_udiv(ptr %a, ptr %b, i1 %c, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
   %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = getelementptr inbounds i32, i32* %b, i64 %i
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = getelementptr inbounds i32, ptr %b, i64 %i
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
@@ -59,22 +59,22 @@ for.end:
 ; Cost of store:
 ;   (store(4) + extractelement(3)) / 2 = 3
 ;
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
-; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %tmp0, align 4
+; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
 ;
-define void @predicated_store(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predicated_store(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %tmp2 = add nsw i32 %tmp1, %x
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   br label %for.inc
 
 for.inc:
@@ -90,31 +90,31 @@ for.end:
 ;
 ; Same as predicate_store except we use a pointer PHI to maintain the address
 ;
-; CHECK: Found scalar instruction:   %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ]
-; CHECK: Found scalar instruction:   %addr.next = getelementptr inbounds i32, i32* %addr, i64 1
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %addr, align 4
-; CHECK: Found an estimated cost of 0 for VF 2 For instruction:   %addr = phi i32* [ %a, %entry ], [ %addr.next, %for.inc ]
-; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, i32* %addr, align 4
+; CHECK: Found scalar instruction:   %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ]
+; CHECK: Found scalar instruction:   %addr.next = getelementptr inbounds i32, ptr %addr, i64 1
+; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %addr, align 4
+; CHECK: Found an estimated cost of 0 for VF 2 For instruction:   %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ]
+; CHECK: Found an estimated cost of 3 for VF 2 For instruction: store i32 %tmp2, ptr %addr, align 4
 ;
-define void @predicated_store_phi(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predicated_store_phi(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %addr = phi i32 * [ %a, %entry ], [ %addr.next, %for.inc ]
-  %tmp1 = load i32, i32* %addr, align 4
+  %addr = phi ptr [ %a, %entry ], [ %addr.next, %for.inc ]
+  %tmp1 = load i32, ptr %addr, align 4
   %tmp2 = add nsw i32 %tmp1, %x
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
-  store i32 %tmp2, i32* %addr, align 4
+  store i32 %tmp2, ptr %addr, align 4
   br label %for.inc
 
 for.inc:
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
-  %addr.next = getelementptr inbounds i32, i32* %addr, i64 1
+  %addr.next = getelementptr inbounds i32, ptr %addr, i64 1
   br i1 %cond, label %for.body, label %for.end
 
 for.end:
@@ -138,15 +138,15 @@ for.end:
 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp3 = add nsw i32 %tmp2, %x
 ; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %tmp4 = udiv i32 %tmp2, %tmp3
 ;
-define i32 @predicated_udiv_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) {
+define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
   %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp2 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp2 = load i32, ptr %tmp0, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
@@ -179,23 +179,23 @@ for.end:
 ;   store(4) / 2 = 2
 ;
 ; CHECK: Scalarizing: %tmp2 = add nsw i32 %tmp1, %x
-; CHECK: Scalarizing and predicating: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Scalarizing and predicating: store i32 %tmp2, ptr %tmp0, align 4
 ; CHECK: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = add nsw i32 %tmp1, %x
-; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
 ;
-define void @predicated_store_scalarized_operand(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predicated_store_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
   %tmp2 = add nsw i32 %tmp1, %x
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   br label %for.inc
 
 for.inc:
@@ -231,21 +231,21 @@ for.end:
 ; CHECK:     Scalarizing and predicating: %tmp3 = sdiv i32 %tmp1, %tmp2
 ; CHECK:     Scalarizing and predicating: %tmp4 = udiv i32 %tmp3, %tmp2
 ; CHECK:     Scalarizing: %tmp5 = sub i32 %tmp4, %x
-; CHECK:     Scalarizing and predicating: store i32 %tmp5, i32* %tmp0, align 4
+; CHECK:     Scalarizing and predicating: store i32 %tmp5, ptr %tmp0, align 4
 ; CHECK:     Found an estimated cost of 1 for VF 2 For instruction: %tmp2 = add i32 %tmp1, %x
 ; CHECK:     Found an estimated cost of 5 for VF 2 For instruction: %tmp3 = sdiv i32 %tmp1, %tmp2
 ; CHECK:     Found an estimated cost of 5 for VF 2 For instruction: %tmp4 = udiv i32 %tmp3, %tmp2
 ; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: %tmp5 = sub i32 %tmp4, %x
-; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, i32* %tmp0, align 4
+; CHECK:     Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp5, ptr %tmp0, align 4
 ;
-define void @predication_multi_context(i32* %a, i1 %c, i32 %x, i64 %n) {
+define void @predication_multi_context(ptr %a, i1 %c, i32 %x, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
@@ -253,7 +253,7 @@ if.then:
   %tmp3 = sdiv i32 %tmp1, %tmp2
   %tmp4 = udiv i32 %tmp3, %tmp2
   %tmp5 = sub i32 %tmp4, %x
-  store i32 %tmp5, i32* %tmp0, align 4
+  store i32 %tmp5, ptr %tmp0, align 4
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
index fea9172..a60577b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -23,7 +23,7 @@ target triple = "aarch64--linux-gnu"
 ; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8>
 ; CHECK:   zext i8 [[Rdx]] to i32
 ;
-define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+define i8 @reduction_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.12 = icmp sgt i32 %n, 0
   br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
@@ -43,11 +43,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %conv4 = and i32 %sum.013, 255
   %add = add nuw nsw i32 %conv, %conv4
@@ -78,7 +78,7 @@ for.body:
 ; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16>
 ; CHECK:   zext i16 [[Rdx]] to i32
 ;
-define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
+define i16 @reduction_i16_1(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.16 = icmp sgt i32 %n, 0
   br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup
@@ -98,11 +98,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %a, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %conv.14 = zext i16 %0 to i32
-  %arrayidx2 = getelementptr inbounds i16, i16* %b, i64 %indvars.iv
-  %1 = load i16, i16* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
+  %1 = load i16, ptr %arrayidx2, align 2
   %conv3.15 = zext i16 %1 to i32
   %conv4.13 = and i32 %sum.017, 65535
   %add = add nuw nsw i32 %conv.14, %conv4.13
@@ -135,7 +135,7 @@ for.body:
 ; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16>
 ; CHECK:   zext i16 [[Rdx]] to i32
 ;
-define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+define i16 @reduction_i16_2(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.14 = icmp sgt i32 %n, 0
   br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup
@@ -155,11 +155,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %conv4.13 = and i32 %sum.015, 65535
   %add = add nuw nsw i32 %conv, %conv4.13
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
index 71b7754..4dd6522 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
@@ -10,23 +10,23 @@
 ; CHECK-LABEL: define {{.*}} @test_tc_too_small
 ; CHECK-NOT: vector.memcheck
 ; CHECK-NOT: vector.body
-define void @test_tc_too_small(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
+define void @test_tc_too_small(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop
 
 loop:                                             ; preds = %bb54, %bb37
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv
-  %lv.1 = load i16, i16* %gep.1, align 2
+  %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv
+  %lv.1 = load i16, ptr %gep.1, align 2
   %ext.1 = sext i16 %lv.1 to i32
-  %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv
-  %lv.2 = load i16, i16* %gep.2, align 2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv
+  %lv.2 = load i16, ptr %gep.2, align 2
   %ext.2 = sext i16 %lv.2 to i32
-  %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1
-  %lv.3 = load i16, i16* %gep.off.1, align 2
+  %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1
+  %lv.3 = load i16, ptr %gep.off.1, align 2
   %ext.3 = sext i16 %lv.3 to i32
-  %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2
-  %lv.4 = load i16, i16* %gep.off.2, align 2
+  %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2
+  %lv.4 = load i16, ptr %gep.off.2, align 2
   %ext.4 = sext i16 %lv.4 to i32
   %tmp62 = mul nsw i32 %ext.2, 11
   %tmp66 = mul nsw i32 %ext.3, -4
@@ -45,11 +45,11 @@ loop:                                             ; preds = %bb54, %bb37
   %tmp82 = sub nsw i32 %tmp81, %ext.1
   %tmp83 = lshr i32 %tmp82, 1
   %trunc.1 = trunc i32 %tmp75 to i16
-  %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv
-  store i16 %trunc.1, i16* %gep.3, align 2
+  %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv
+  store i16 %trunc.1, ptr %gep.3, align 2
   %trunc.2 = trunc i32 %tmp83 to i16
-  %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
-  store i16 %trunc.2, i16* %gep.4, align 2
+  %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv
+  store i16 %trunc.2, ptr %gep.4, align 2
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp = icmp ult i64 %iv, 10
   br i1 %cmp, label %loop, label %exit
@@ -65,23 +65,23 @@ exit:
 ; THRESHOLD-NOT: vector.memcheck
 ; THRESHOLD-NOT: vector.body
 ;
-define void @test_tc_big_enough(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
+define void @test_tc_big_enough(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop
 
 loop:                                             ; preds = %bb54, %bb37
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv
-  %lv.1 = load i16, i16* %gep.1, align 2
+  %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv
+  %lv.1 = load i16, ptr %gep.1, align 2
   %ext.1 = sext i16 %lv.1 to i32
-  %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv
-  %lv.2 = load i16, i16* %gep.2, align 2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv
+  %lv.2 = load i16, ptr %gep.2, align 2
   %ext.2 = sext i16 %lv.2 to i32
-  %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1
-  %lv.3 = load i16, i16* %gep.off.1, align 2
+  %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1
+  %lv.3 = load i16, ptr %gep.off.1, align 2
   %ext.3 = sext i16 %lv.3 to i32
-  %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2
-  %lv.4 = load i16, i16* %gep.off.2, align 2
+  %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2
+  %lv.4 = load i16, ptr %gep.off.2, align 2
   %ext.4 = sext i16 %lv.4 to i32
   %tmp62 = mul nsw i32 %ext.2, 11
   %tmp66 = mul nsw i32 %ext.3, -4
@@ -100,11 +100,11 @@ loop:                                             ; preds = %bb54, %bb37
   %tmp82 = sub nsw i32 %tmp81, %ext.1
   %tmp83 = lshr i32 %tmp82, 1
   %trunc.1 = trunc i32 %tmp75 to i16
-  %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv
-  store i16 %trunc.1, i16* %gep.3, align 2
+  %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv
+  store i16 %trunc.1, ptr %gep.3, align 2
   %trunc.2 = trunc i32 %tmp83 to i16
-  %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
-  store i16 %trunc.2, i16* %gep.4, align 2
+  %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv
+  store i16 %trunc.2, ptr %gep.4, align 2
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp = icmp ult i64 %iv, 500
   br i1 %cmp, label %loop, label %exit
@@ -113,7 +113,7 @@ exit:
   ret void
 }
 
-define void @test_tc_unknown(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2, i64 %N) {
+define void @test_tc_unknown(ptr %ptr.1, ptr %ptr.2, ptr %ptr.3, ptr %ptr.4, i64 %off.1, i64 %off.2, i64 %N) {
 ; CHECK-LABEL: define void @test_tc_unknown
 ; DEFAULT:       [[ADD:%.+]] = add i64 %N, 1
 ; DEFAULT-NEXT:  [[C:%.+]] = icmp ult i64 [[ADD]], 16
@@ -126,17 +126,17 @@ entry:
 
 loop:                                             ; preds = %bb54, %bb37
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv
-  %lv.1 = load i16, i16* %gep.1, align 2
+  %gep.1 = getelementptr inbounds i16, ptr %ptr.1, i64 %iv
+  %lv.1 = load i16, ptr %gep.1, align 2
   %ext.1 = sext i16 %lv.1 to i32
-  %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv
-  %lv.2 = load i16, i16* %gep.2, align 2
+  %gep.2 = getelementptr inbounds i16, ptr %ptr.2, i64 %iv
+  %lv.2 = load i16, ptr %gep.2, align 2
   %ext.2 = sext i16 %lv.2 to i32
-  %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1
-  %lv.3 = load i16, i16* %gep.off.1, align 2
+  %gep.off.1 = getelementptr inbounds i16, ptr %gep.2, i64 %off.1
+  %lv.3 = load i16, ptr %gep.off.1, align 2
   %ext.3 = sext i16 %lv.3 to i32
-  %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2
-  %lv.4 = load i16, i16* %gep.off.2, align 2
+  %gep.off.2 = getelementptr inbounds i16, ptr %gep.2, i64 %off.2
+  %lv.4 = load i16, ptr %gep.off.2, align 2
   %ext.4 = sext i16 %lv.4 to i32
   %tmp62 = mul nsw i32 %ext.2, 11
   %tmp66 = mul nsw i32 %ext.3, -4
@@ -155,11 +155,11 @@ loop:                                             ; preds = %bb54, %bb37
   %tmp82 = sub nsw i32 %tmp81, %ext.1
   %tmp83 = lshr i32 %tmp82, 1
   %trunc.1 = trunc i32 %tmp75 to i16
-  %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv
-  store i16 %trunc.1, i16* %gep.3, align 2
+  %gep.3 = getelementptr inbounds i16, ptr %ptr.3, i64 %iv
+  store i16 %trunc.1, ptr %gep.3, align 2
   %trunc.2 = trunc i32 %tmp83 to i16
-  %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv
-  store i16 %trunc.2, i16* %gep.4, align 2
+  %gep.4 = getelementptr inbounds i16, ptr %ptr.4, i64 %iv
+  store i16 %trunc.2, ptr %gep.4, align 2
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp = icmp ult i64 %iv, %N
   br i1 %cmp, label %loop, label %exit
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
index b001538..b66bb94 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
@@ -4,7 +4,7 @@
 ; CHECK-REMARKS: UserVF ignored because of invalid costs.
 ; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): alloca
 ; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @alloca(i32** %vla, i64 %N) {
+define void @alloca(ptr %vla, i64 %N) {
 ; CHECK-LABEL: @alloca(
 ; CHECK-NOT: <vscale x
 
@@ -14,18 +14,18 @@ entry:
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
   %alloca = alloca i32, align 16
-  %arrayidx = getelementptr inbounds i32*, i32** %vla, i64 %iv
-  store i32* %alloca, i32** %arrayidx, align 8
+  %arrayidx = getelementptr inbounds ptr, ptr %vla, i64 %iv
+  store ptr %alloca, ptr %arrayidx, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
 
 for.end:
-  call void @foo(i32** nonnull %vla)
+  call void @foo(ptr nonnull %vla)
   ret void
 }
 
-declare void @foo(i32**)
+declare void @foo(ptr)
 
 !0 = !{!0, !1}
 !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 3e4a798..37d1337 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -5,10 +5,10 @@
 ; RUN:     -prefer-predicate-over-epilogue=scalar-epilogue -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
 ; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
 
-define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
+define void @vec_load(i64 %N, ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-LABEL: @vec_load
 ; CHECK: vector.body:
-; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
+; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, ptr
 ; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
@@ -16,12 +16,12 @@ entry:
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = call double @foo(double %0) #0
   %add = fadd double %1, 1.000000e+00
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %iv
-  store double %add, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %iv
+  store double %add, ptr %arrayidx2, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
@@ -30,7 +30,7 @@ for.end:                                 ; preds = %for.body, %entry
   ret void
 }
 
-define void @vec_scalar(i64 %N, double* nocapture %a) {
+define void @vec_scalar(i64 %N, ptr nocapture %a) {
 ; CHECK-LABEL: @vec_scalar
 ; CHECK: vector.body:
 ; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 1.000000e+01, i32 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer))
@@ -42,8 +42,8 @@ for.body:                                         ; preds = %for.body.preheader,
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %0 = call double @foo(double 10.0) #0
   %sub = fsub double %0, 1.000000e+00
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  store double %sub, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  store double %sub, ptr %arrayidx, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
@@ -52,22 +52,22 @@ for.end:                                 ; preds = %for.body, %entry
   ret void
 }
 
-define void @vec_ptr(i64 %N, i64* noalias %a, i64** readnone %b) {
+define void @vec_ptr(i64 %N, ptr noalias %a, ptr readnone %b) {
 ; CHECK-LABEL: @vec_ptr
 ; CHECK: vector.body:
-; CHECK: %[[LOAD:.*]] = load <vscale x 2 x i64*>, <vscale x 2 x i64*>*
-; CHECK: call <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*> %[[LOAD]])
+; CHECK: %[[LOAD:.*]] = load <vscale x 2 x ptr>, ptr
+; CHECK: call <vscale x 2 x i64> @bar_vec(<vscale x 2 x ptr> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
   br i1 %cmp7, label %for.body, label %for.end
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %gep = getelementptr i64*, i64** %b, i64 %iv
-  %load = load i64*, i64** %gep
-  %call = call i64 @bar(i64* %load) #1
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
-  store i64 %call, i64* %arrayidx
+  %gep = getelementptr ptr, ptr %b, i64 %iv
+  %load = load ptr, ptr %gep
+  %call = call i64 @bar(ptr %load) #1
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
+  store i64 %call, ptr %arrayidx
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1024
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -76,10 +76,10 @@ for.end:
   ret void
 }
 
-define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) {
+define void @vec_intrinsic(i64 %N, ptr nocapture readonly %a) {
 ; CHECK-LABEL: @vec_intrinsic
 ; CHECK: vector.body:
-; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
+; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, ptr
 ; CHECK: call fast <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
@@ -87,11 +87,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = call fast double @llvm.sin.f64(double %0) #2
   %add = fadd fast double %1, 1.000000e+00
-  store double %add, double* %arrayidx, align 8
+  store double %add, ptr %arrayidx, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -104,7 +104,7 @@ for.end:
 ; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
 ; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+define void @vec_sin_no_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_no_mapping
 ; CHECK: call fast <2 x float> @llvm.sin.v2f32
 ; CHECK-NOT: <vscale x
@@ -113,11 +113,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4, !dbg !11
   %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4, !dbg !13
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %1, ptr %arrayidx1, align 4, !dbg !13
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -131,7 +131,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @vec_sin_no_mapping_ite(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+define void @vec_sin_no_mapping_ite(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_no_mapping_ite
 ; CHECK-NOT: <vscale x
 ; CHECK: ret
@@ -140,8 +140,8 @@ entry:
 
 for.body:                                         ; preds = %entry, %if.end
   %i.07 = phi i64 [ %inc, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4, !dbg !11
   %cmp = fcmp ugt float %0, 0.0000
   br i1 %cmp, label %if.then, label %if.else
 if.then:
@@ -152,8 +152,8 @@ if.else:
   br label %if.end
 if.end:
   %3 = phi float [%1, %if.then], [%2, %if.else]
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %3, float* %arrayidx1, align 4, !dbg !14
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %3, ptr %arrayidx1, align 4, !dbg !14
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -166,7 +166,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
 ; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
 ; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+define void @vec_sin_fixed_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_fixed_mapping
 ; CHECK: call fast <2 x float> @llvm.sin.v2f32
 ; CHECK-NOT: <vscale x
@@ -175,11 +175,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4, !dbg !11
   %1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4, !dbg !13
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %1, ptr %arrayidx1, align 4, !dbg !13
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -191,7 +191,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; Even though there are no function mappings attached to the call
 ; in the loop below we can still vectorize the loop because SVE has
 ; hardware support in the form of the 'fqsrt' instruction.
-define void @vec_sqrt_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) #0 {
+define void @vec_sqrt_no_mapping(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %n) #0 {
 ; CHECK: @vec_sqrt_no_mapping
 ; CHECK: call fast <vscale x 2 x float> @llvm.sqrt.nxv2f32
 entry:
@@ -199,11 +199,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call fast float @llvm.sqrt.f32(float %0)
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %1, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -214,13 +214,13 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 
 declare double @foo(double)
-declare i64 @bar(i64*)
+declare i64 @bar(ptr)
 declare double @llvm.sin.f64(double)
 declare float @llvm.sin.f32(float)
 declare float @llvm.sqrt.f32(float)
 
 declare <vscale x 2 x double> @foo_vec(<vscale x 2 x double>)
-declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*>)
+declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x ptr>)
 declare <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double>)
 declare <2 x double> @sin_vec_v2f64(<2 x double>)
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll
index 4a6b1fb..8123651 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll
@@ -11,7 +11,7 @@ target triple = "aarch64-unknown-linux-gnu"
 ;      a[i] /= b[i];
 ;  }
 
-define void  @predication_in_loop(i32* %a, i32* %b, i32* %cond) #0 {
+define void  @predication_in_loop(ptr %a, ptr %b, ptr %cond) #0 {
 ; CHECK-LABEL: @predication_in_loop
 ; CHECK:  sdiv <vscale x 4 x i32>
 ;
@@ -23,18 +23,18 @@ for.cond.cleanup:                                 ; preds = %for.inc, %entry
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.09 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.09
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.09
+  %2 = load i32, ptr %arrayidx2, align 4
   %div = sdiv i32 %2, %1
-  store i32 %div, i32* %arrayidx2, align 4
+  store i32 %div, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -56,7 +56,7 @@ for.inc:                                          ; preds = %for.body, %if.then
 ; otherwise it could  be able to vectorize, but will not because
 ; "Max legal vector width too small, scalable vectorization unfeasible.."
 
-define void @unpredicated_loop_predication_through_tailfolding(i32* %a, i32* %b) #0 {
+define void @unpredicated_loop_predication_through_tailfolding(ptr %a, ptr %b) #0 {
 ; CHECK-LABEL: @unpredicated_loop_predication_through_tailfolding
 ; CHECK-NOT:  sdiv <vscale x 4 x i32>
 
@@ -65,14 +65,14 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %sdiv = sdiv i32 %1, %0
   %2 = add nuw nsw i64 %iv, 8
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %sdiv, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %sdiv, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll
index bb11b2d..9989518 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll
@@ -1,30 +1,30 @@
 ; RUN: opt < %s -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
 ; RUN:   -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S | FileCheck %s
 
-define void @invariant_store_red_exit_is_phi(i32* %dst, i32* readonly %src, i64 %n) {
+define void @invariant_store_red_exit_is_phi(ptr %dst, ptr readonly %src, i64 %n) {
 ; CHECK-LABEL: @invariant_store_red_exit_is_phi(
 ; CHECK: vector.ph:
 ; CHECK:      %[[ACTIVE_LANE_MASK_ENTRY:.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %n)
 ; CHECK: vector.body:
 ; CHECK:      %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> [ %[[ACTIVE_LANE_MASK_ENTRY]], %vector.ph ], [ %[[ACTIVE_LANE_MASK_NEXT:.*]], %vector.body ]
 ; CHECK:      %[[VEC_PHI:.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ %[[PREDPHI:.*]], %vector.body ]
-; CHECK:      %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32
+; CHECK:      %[[LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0
 ; CHECK-NEXT: %[[ADD:.*]] = add <vscale x 4 x i32> %[[VEC_PHI]], %[[LOAD]]
 ; CHECK-NEXT: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32> %[[VEC_PHI]]
 ; CHECK:      %[[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %{{.*}}, i64 %n)
 ; CHECK: middle.block:
 ; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %[[SELECT]])
-; CHECK-NEXT: store i32 %[[SUM]], i32* %dst, align 4
+; CHECK-NEXT: store i32 %[[SUM]], ptr %dst, align 4
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %red = phi i32 [ 0, %entry ], [ %storemerge, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx6 = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %load = load i32, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %load = load i32, ptr %arrayidx6, align 4
   %storemerge = add i32 %red, %load
-  store i32 %storemerge, i32* %dst, align 4
+  store i32 %storemerge, ptr %dst, align 4
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index 381efbd..fb8c752 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -7,7 +7,7 @@
 ; ADD
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -23,8 +23,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -37,7 +37,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; OR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @or(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @or
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -53,8 +53,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %or = or i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -67,7 +67,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; AND
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @and(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @and
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -83,8 +83,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %and = and i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -97,7 +97,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; XOR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @xor(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @xor
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -113,8 +113,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %xor = xor i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -127,7 +127,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; SMIN
 
-define i32 @smin(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @smin
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -146,8 +146,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp slt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -161,7 +161,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; UMAX
 
-define i32 @umax(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @umax
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -180,8 +180,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp ugt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -195,7 +195,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; FADD (FAST)
 
-define float @fadd_fast(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -211,8 +211,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -224,7 +224,7 @@ for.end:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
-define bfloat @fadd_fast_bfloat(bfloat* noalias nocapture readonly %a, i64 %n) {
+define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast_bfloat
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <8 x bfloat>
@@ -240,8 +240,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds bfloat, bfloat* %a, i64 %iv
-  %0 = load bfloat, bfloat* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+  %0 = load bfloat, ptr %arrayidx, align 4
   %add = fadd fast bfloat %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -254,7 +254,7 @@ for.end:
 ; FMIN (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmin_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -273,8 +273,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp fast olt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -288,7 +288,7 @@ for.end:
 ; FMAX (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmax_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -307,8 +307,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp fast ogt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -322,7 +322,7 @@ for.end:
 ; ADD (with reduction stored in invariant address)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2)
-define void @invariant_store(i32* %dst, i32* readonly %src) {
+define void @invariant_store(ptr %dst, ptr readonly %src) {
 ; CHECK-LABEL: @invariant_store
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 4 x i32>
@@ -332,18 +332,18 @@ define void @invariant_store(i32* %dst, i32* readonly %src) {
 ; CHECK: middle.block:
 ; CHECK: %[[ADD:.*]] = add <vscale x 4 x i32> %[[ADD2]], %[[ADD1]]
 ; CHECK-NEXT: %[[SUM:.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %[[ADD]])
-; CHECK-NEXT: store i32 %[[SUM]], i32* %gep.dst, align 4
+; CHECK-NEXT: store i32 %[[SUM]], ptr %gep.dst, align 4
 entry:
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
-  store i32 0, i32* %gep.dst, align 4
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 42
+  store i32 0, ptr %gep.dst, align 4
   br label %for.body
 for.body:
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %0 = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %0 = load i32, ptr %gep.src, align 4
   %add = add nsw i32 %sum, %0
-  store i32 %add, i32* %gep.dst, align 4
+  store i32 %add, ptr %gep.dst, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -358,7 +358,7 @@ for.cond.cleanup:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @mul(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mul
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -374,8 +374,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -388,7 +388,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; Note: This test was added to ensure we always check the legality of reductions (end emit a warning if necessary) before checking for memory dependencies
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @memory_dependence(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @memory_dependence
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -408,14 +408,14 @@ entry:
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %add2 = add nuw nsw i64 %i, 32
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %add2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2
+  store i32 %add, ptr %arrayidx3, align 4
   %mul = mul nsw i32 %1, %sum
   %inc = add nuw nsw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %n
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
index 2a8f6ed3d..f28f77b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll
@@ -30,20 +30,20 @@
 
 ; VF-4: <4 x i32>
 ; VF-VSCALE4: <16 x i32>
-define void @test0(i32* %a, i8* %b, i32* %c) #0 {
+define void @test0(ptr %a, ptr %b, ptr %c) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
index 3628ab0..e83eb72 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
@@ -6,7 +6,7 @@
 ; Test that the MaxVF for the following loop, that has no dependence distances,
 ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
 ; (maximized bandwidth for i8 in the loop).
-define void @test0(i32* %a, i8* %b, i32* %c) #0 {
+define void @test0(ptr %a, ptr %b, ptr %c) #0 {
 ; CHECK: LV: Checking a loop in 'test0'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -19,14 +19,14 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -37,7 +37,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 64 elements, is calculated as (maxvscale = 16) * 4.
-define void @test1(i32* %a, i8* %b) #0 {
+define void @test1(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test1'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -50,15 +50,15 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
   %2 = add nuw nsw i64 %iv, 64
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -69,7 +69,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 32 elements, is calculated as (maxvscale = 16) * 2.
-define void @test2(i32* %a, i8* %b) #0 {
+define void @test2(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test2'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -82,15 +82,15 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
   %2 = add nuw nsw i64 %iv, 32
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -101,7 +101,7 @@ exit:
 
 ; Test that the MaxVF for the following loop, with a dependence distance
 ; of 16 elements, is calculated as (maxvscale = 16) * 1.
-define void @test3(i32* %a, i8* %b) #0 {
+define void @test3(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test3'
 ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
 ; CHECK_SCALABLE_ON: LV: Selecting VF: 16
@@ -114,15 +114,15 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
-  %1 = load i8, i8* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
+  %1 = load i8, ptr %arrayidx2, align 4
   %zext = zext i8 %1 to i32
   %add = add nsw i32 %zext, %0
   %2 = add nuw nsw i64 %iv, 16
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
@@ -133,7 +133,7 @@ exit:
 
 ; Test the fallback mechanism when scalable vectors are not feasible due
 ; to e.g. dependence distance.
-define void @test4(i32* %a, i32* %b) #0 {
+define void @test4(ptr %a, ptr %b) #0 {
 ; CHECK: LV: Checking a loop in 'test4'
 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
 ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
@@ -147,14 +147,14 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 8
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
index b8a5d59..47b159b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -45,20 +45,20 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test1
 ; CHECK: <4 x i32>
-define void @test1(i32* %a, i32* %b) #0 {
+define void @test1(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 8
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
@@ -90,20 +90,20 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test2
 ; CHECK: <4 x i32>
-define void @test2(i32* %a, i32* %b) #0 {
+define void @test2(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3
@@ -138,20 +138,20 @@ exit:
 ; CHECK-DBG: LV: Using user VF vscale x 2.
 ; CHECK-LABEL: @test3
 ; CHECK: <vscale x 2 x i32>
-define void @test3(i32* %a, i32* %b) #0 {
+define void @test3(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 32
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6
@@ -190,20 +190,20 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: vscale x 2.
 ; CHECK-LABEL: @test4
 ; CHECK: <vscale x 2 x i32>
-define void @test4(i32* %a, i32* %b) #0 {
+define void @test4(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 32
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9
@@ -238,20 +238,20 @@ exit:
 ; CHECK-DBG: LV: Using user VF vscale x 4
 ; CHECK-LABEL: @test5
 ; CHECK: <vscale x 4 x i32>
-define void @test5(i32* %a, i32* %b) #0 {
+define void @test5(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 128
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12
@@ -289,20 +289,20 @@ exit:
 ; CHECK-DBG: Selecting VF: vscale x 4.
 ; CHECK-LABEL: @test6
 ; CHECK: <vscale x 4 x i32>
-define void @test6(i32* %a, i32* %b) #0 {
+define void @test6(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 128
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15
@@ -322,18 +322,18 @@ exit:
 ; CHECK-NO-SVE-LABEL: @test_no_sve
 ; CHECK-NO-SVE: <4 x i32>
 ; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
-define void @test_no_sve(i32* %a, i32* %b) #0 {
+define void @test_no_sve(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  store i32 %add, i32* %arrayidx, align 4
+  store i32 %add, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18
@@ -356,20 +356,20 @@ exit:
 ; CHECK-DBG: LV: Selecting VF: 4.
 ; CHECK-LABEL: @test_no_max_vscale
 ; CHECK: <4 x i32>
-define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
+define void @test_no_max_vscale(ptr %a, ptr %b) #0 {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll
index c2581b9..bd3a01b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll
@@ -16,17 +16,17 @@ target triple = "aarch64-unknown-linux-gnu"
 ; architecture with masked loads/stores, but we use SVE for testing purposes
 ; here.
 
-define void @foo(i32* %data1, i32* %data2) {
+define void @foo(ptr %data1, ptr %data2) {
 ; CHECK-LABEL: @foo(
 ; CHECK:       vector.body:
 ; CHECK:         br i1 {{%.*}}, label %pred.store.if, label %pred.store.continue
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    store i32 {{%.*}}, i32* {{%.*}}
+; CHECK-NEXT:    store i32 {{%.*}}, ptr {{%.*}}
 ; CHECK-NEXT:    br label %pred.store.continue
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    br i1 {{%.*}}, label %pred.store.if1, label %pred.store.continue2
 ; CHECK:       pred.store.if1:
-; CHECK-NEXT:    store i32 {{%.*}}, i32* {{%.*}}
+; CHECK-NEXT:    store i32 {{%.*}}, ptr {{%.*}}
 ; CHECK-NEXT:    br label %pred.store.continue2
 ; CHECK:       pred.store.continue2:
 
@@ -35,13 +35,13 @@ entry:
 
 while.body:
   %i = phi i64 [ 1023, %entry ], [ %i.next, %if.end ]
-  %arrayidx = getelementptr inbounds i32, i32* %data1, i64 %i
-  %ld = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %data1, i64 %i
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp = icmp sgt i32 %ld, %ld
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:
-  store i32 %ld, i32* %arrayidx, align 4
+  store i32 %ld, ptr %arrayidx, align 4
   br label %if.end
 
 if.end:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
index a0ef4f4..38fedc1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
@@ -7,7 +7,7 @@ target triple = "aarch64--linux-gnu"
 @Foo = common global %struct.anon zeroinitializer, align 4
 
 ; CHECK-LABEL: @foo(
-; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, ptr
 ; CHECK: sdiv <4 x i32>
 ; CHECK: store <4 x i32>
 
@@ -17,11 +17,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %div = sdiv i32 %0, 2
-  %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
-  store i32 %div, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
index 604ac07..1cde8b9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
-define void @selects_1(i32* nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
+define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond = select i1 %cmp1, i32 10, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
@@ -27,8 +27,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %and = and i32 %0, 2047
   %cmp1 = icmp eq i32 %and, %A
   %cond = select i1 %cmp1, i32 10, i32 %and
@@ -36,7 +36,7 @@ for.body:                                         ; preds = %for.body.preheader,
   %cond6 = select i1 %cmp2, i32 30, i32 %and
   %cmp7 = icmp ugt i32 %cond, %C
   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
-  store i32 %cond11, i32* %arrayidx, align 4
+  store i32 %cond11, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
index 1052b7f..87347ef 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll
@@ -7,23 +7,23 @@ target triple = "aarch64--linux-gnu"
 ; CHECK-LABEL: Checking a loop in 'interleaved_access'
 ; CHECK:         The Smallest and Widest types: 64 / 64 bits
 ;
-define void @interleaved_access(i8** %A, i64 %N) {
+define void @interleaved_access(ptr %A, i64 %N) {
 for.ph:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next.3, %for.body ], [ 0, %for.ph ]
-  %tmp0 = getelementptr inbounds i8*, i8** %A, i64 %i
-  store i8* null, i8** %tmp0, align 8
+  %tmp0 = getelementptr inbounds ptr, ptr %A, i64 %i
+  store ptr null, ptr %tmp0, align 8
   %i.next.0 = add nuw nsw i64 %i, 1
-  %tmp1 = getelementptr inbounds i8*, i8** %A, i64 %i.next.0
-  store i8* null, i8** %tmp1, align 8
+  %tmp1 = getelementptr inbounds ptr, ptr %A, i64 %i.next.0
+  store ptr null, ptr %tmp1, align 8
   %i.next.1 = add nsw i64 %i, 2
-  %tmp2 = getelementptr inbounds i8*, i8** %A, i64 %i.next.1
-  store i8* null, i8** %tmp2, align 8
+  %tmp2 = getelementptr inbounds ptr, ptr %A, i64 %i.next.1
+  store ptr null, ptr %tmp2, align 8
   %i.next.2 = add nsw i64 %i, 3
-  %tmp3 = getelementptr inbounds i8*, i8** %A, i64 %i.next.2
-  store i8* null, i8** %tmp3, align 8
+  %tmp3 = getelementptr inbounds ptr, ptr %A, i64 %i.next.2
+  store ptr null, ptr %tmp3, align 8
   %i.next.3 = add nsw i64 %i, 4
   %cond = icmp slt i64 %i.next.3, %N
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
index 16080e5..3127189 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
@@ -9,15 +9,15 @@ target triple="aarch64-unknown-linux-gnu"
 ; CHECK-VF4: Found an estimated cost of 21 for VF 4 For instruction:   %add = fadd float %0, %sum.07
 ; CHECK-VF8: Found an estimated cost of 42 for VF 8 For instruction:   %add = fadd float %0, %sum.07
 
-define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -31,15 +31,15 @@ for.end:
 ; CHECK-VF4: Found an estimated cost of 18 for VF 4 For instruction:   %add = fadd double %0, %sum.07
 ; CHECK-VF8: Found an estimated cost of 36 for VF 8 For instruction:   %add = fadd double %0, %sum.07
 
-define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) {
+define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 4
   %add = fadd double %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -52,17 +52,17 @@ for.end:
 ; CHECK-VF4: Found an estimated cost of 23 for VF 4 For instruction:   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
 ; CHECK-VF8: Found an estimated cost of 46 for VF 8 For instruction:   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
 
-define float @fmuladd_strict32(float* %a, float* %b, i64 %n) {
+define float @fmuladd_strict32(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
+  %1 = load float, ptr %arrayidx2, align 4
   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -77,17 +77,17 @@ declare float @llvm.fmuladd.f32(float, float, float)
 ; CHECK-VF4: Found an estimated cost of 22 for VF 4 For instruction:   %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
 ; CHECK-VF8: Found an estimated cost of 44 for VF 8 For instruction:   %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
 
-define double @fmuladd_strict64(double* %a, double* %b, i64 %n) {
+define double @fmuladd_strict64(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi double [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds double, double* %b, i64 %iv
-  %1 = load double, double* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %b, i64 %iv
+  %1 = load double, ptr %arrayidx2, align 4
   %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
index 679c9dc..a06c5a7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-vf1.ll
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
 
 ; CHECK-DEBUG: LV: Not interleaving scalar ordered reductions.
 
-define void @foo(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %M, i64 %N) {
+define void @foo(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %M, i64 %N) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NOT: vector.body
 
@@ -15,7 +15,7 @@ entry:
 
 for.body.us:                                      ; preds = %entry, %for.cond3
   %i.023.us = phi i64 [ %inc8.us, %for.cond3 ], [ 0, %entry ]
-  %arrayidx.us = getelementptr inbounds float, float* %dst, i64 %i.023.us
+  %arrayidx.us = getelementptr inbounds float, ptr %dst, i64 %i.023.us
   %mul.us = mul nsw i64 %i.023.us, %N
   br label %for.body3.us
 
@@ -23,8 +23,8 @@ for.body3.us:                                     ; preds = %for.body.us, %for.b
   %0 = phi float [ 0.000000e+00, %for.body.us ], [ %add6.us, %for.body3.us ]
   %j.021.us = phi i64 [ 0, %for.body.us ], [ %inc.us, %for.body3.us ]
   %add.us = add nsw i64 %j.021.us, %mul.us
-  %arrayidx4.us = getelementptr inbounds float, float* %src, i64 %add.us
-  %1 = load float, float* %arrayidx4.us, align 4
+  %arrayidx4.us = getelementptr inbounds float, ptr %src, i64 %add.us
+  %1 = load float, ptr %arrayidx4.us, align 4
   %add6.us = fadd float %1, %0
   %inc.us = add nuw nsw i64 %j.021.us, 1
   %exitcond.not = icmp eq i64 %inc.us, %N
@@ -32,7 +32,7 @@ for.body3.us:                                     ; preds = %for.body.us, %for.b
 
 for.cond3:                                        ; preds = %for.body3.us
   %add6.us.lcssa = phi float [ %add6.us, %for.body3.us ]
-  store float %add6.us.lcssa, float* %arrayidx.us, align 4
+  store float %add6.us.lcssa, ptr %arrayidx.us, align 4
   %inc8.us = add nuw nsw i64 %i.023.us, 1
   %exitcond26.not = icmp eq i64 %inc8.us, %M
   br i1 %exitcond26.not, label %exit, label %for.body.us
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
index a9b90f5..8ee0338 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
@@ -5,26 +5,26 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define void @cmpsel_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @cmpsel_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 10, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK:         store <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32>* {{.*}}, align 4
+; CHECK:         store <vscale x 4 x i32> [[TMP2]], ptr {{.*}}, align 4
 ;
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   %cond = select i1 %tobool.not, i32 2, i32 10
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %cond, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %cond, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0
@@ -36,26 +36,26 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-define void @cmpsel_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @cmpsel_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @cmpsel_f32(
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+01, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK:         store <vscale x 4 x float> [[TMP2]], <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         store <vscale x 4 x float> [[TMP2]], ptr {{.*}}, align 4
 
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 3.000000e+00
   %conv = select i1 %cmp1, float 1.000000e+01, float 2.000000e+00
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %conv, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %conv, ptr %arrayidx3, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -64,24 +64,24 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-define void @fneg_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @fneg_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @fneg_f32(
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = fneg <vscale x 4 x float> [[WIDE_LOAD]]
-; CHECK:         store <vscale x 4 x float> [[TMP1]], <vscale x 4 x float>* {{.*}}, align 4
+; CHECK:         store <vscale x 4 x float> [[TMP1]], ptr {{.*}}, align 4
 
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %fneg = fneg float %0
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %fneg, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %fneg, ptr %arrayidx3, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
index 6d1bb88..a5ebd00 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - | FileCheck %s
 
-define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
+define void @cond_inv_load_i32i32i16(ptr noalias nocapture %a, ptr noalias nocapture readonly %cond, ptr noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @cond_inv_load_i32i32i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -14,20 +14,18 @@ define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16*> poison, i16* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> [[BROADCAST_SPLAT]], i32 2, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i16> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 2, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i16> poison)
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[TMP7]], <vscale x 4 x i32>* [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP7]], ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -41,15 +39,15 @@ define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[I_07]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[I_07]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP13]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[INV]], align 2
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, ptr [[INV]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP14]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_07]]
-; CHECK-NEXT:    store i32 [[CONV]], i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_07]]
+; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_07]], 1
@@ -63,16 +61,16 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %1 = load i16, i16* %inv, align 2
+  %1 = load i16, ptr %inv, align 2
   %conv = sext i16 %1 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.07
-  store i32 %conv, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.07
+  store i32 %conv, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -84,7 +82,7 @@ exit:                        ; preds = %for.inc
   ret void
 }
 
-define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 {
+define void @cond_inv_load_f64f64f64(ptr noalias nocapture %a, ptr noalias nocapture readonly %cond, ptr noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @cond_inv_load_f64f64f64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -96,19 +94,17 @@ define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noali
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x double*> poison, double* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x double*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x double*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, double* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TMP4]] to <vscale x 4 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, <vscale x 4 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <vscale x 4 x double> [[WIDE_LOAD]], shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 4.000000e-01, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x double> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[TMP7]] to <vscale x 4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4f64.p0nxv4f64(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], <vscale x 4 x double>* [[TMP8]], i32 8, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x double> poison)
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], i32 8, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -122,14 +118,14 @@ define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noali
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[COND]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[TMP12]], 4.000000e-01
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP13:%.*]] = load double, double* [[INV]], align 8
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store double [[TMP13]], double* [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = load double, ptr [[INV]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store double [[TMP13]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
@@ -143,15 +139,15 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.08 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %cmp1 = fcmp ogt double %0, 4.000000e-01
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load double, double* %inv, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %i.08
-  store double %1, double* %arrayidx2, align 8
+  %1 = load double, ptr %inv, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %i.08
+  store double %1, ptr %arrayidx2, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -163,7 +159,7 @@ exit:                        ; preds = %for.inc
   ret void
 }
 
-define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 {
+define void @invariant_load_cond(ptr noalias nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %cond, i64 %n) #0 {
 ; CHECK-LABEL: @invariant_load_cond(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -178,21 +174,18 @@ define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture reado
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 42
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[TMP4]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[DOTSPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 42
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[TMP4]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[DOTSPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[B]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[DOTSPLAT]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[DOTSPLAT]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_LOAD]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[TMP10]], <vscale x 4 x i32>* [[TMP12]], i32 4, <vscale x 4 x i1> [[TMP7]])
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP10]], ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP7]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
@@ -206,18 +199,18 @@ define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture reado
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP16]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 42
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 42
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP17]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -231,19 +224,19 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 42
-  %arrayidx2 = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %0 = load i32, i32* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 42
+  %arrayidx2 = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %0 = load i32, ptr %arrayidx2, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx3, align 4
-  %2 = load i32, i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx3, align 4
+  %2 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %2, %1
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx4, align 4
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll
index 64a0d7d..3fc1a15 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll
@@ -5,19 +5,19 @@
 target triple="aarch64--linux-gnu"
 
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %1 = load float, float* %arrayidx3, align 4
-define void @gather_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %1 = load float, ptr %arrayidx3, align 4
+define void @gather_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  store float %1, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %0
+  %1 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %1, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -27,19 +27,19 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_loaded_index'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %1, float* %arrayidx5, align 4
-define void @scatter_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %1, ptr %arrayidx5, align 4
+define void @scatter_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %1 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %c, i64 %0
-  store float %1, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %0
+  store float %1, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -51,18 +51,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
 ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and a cost of 1.
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_unknown_stride'
-; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   %0 = load float, float* %arrayidx, align 4
-define void @gather_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   %0 = load float, ptr %arrayidx, align 4
+define void @gather_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, %stride
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -74,18 +74,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
 ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and cost is 1.
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_unknown_stride'
-; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store float %0, float* %arrayidx2, align 4
-define void @scatter_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction:   store float %0, ptr %arrayidx2, align 4
+define void @scatter_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, %stride
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -95,18 +95,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, float* %arrayidx, align 4
-define void @gather_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, ptr %arrayidx, align 4
+define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -116,18 +116,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride2'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, float* %arrayidx2, align 4
-define void @scatter_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, ptr %arrayidx2, align 4
+define void @scatter_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -138,18 +138,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 
 ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride64'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, float* %arrayidx, align 4
-define void @gather_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   %0 = load float, ptr %arrayidx, align 4
+define void @gather_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 64
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -159,18 +159,18 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride64'
-; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, float* %arrayidx2, align 4
-define void @scatter_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction:   store float %0, ptr %arrayidx2, align 4
+define void @scatter_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 64
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv.stride2
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv.stride2
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
index 14db7f2..7a630fe 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue -S %s -force-target-instruction-cost=1 -o - | FileCheck %s
 
-define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
+define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
 ; CHECK-LABEL: @gather_nxv4i32_ind64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -17,14 +17,12 @@ define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noa
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <vscale x 4 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, <vscale x 4 x i64>* [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], <vscale x 4 x i64> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float* [[TMP7]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], <vscale x 4 x float>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i64>, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP6]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
@@ -38,12 +36,12 @@ define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noa
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP13]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP13]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -55,12 +53,12 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  store float %1, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %0
+  %1 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %1, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -72,7 +70,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 ; NOTE: I deliberately chose '%b' as an array of i32 indices, since the
 ; additional 'sext' in the for.body loop exposes additional code paths
 ; during vectorisation.
-define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 {
+define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i64 %n) #0 {
 ; CHECK-LABEL: @scatter_nxv4i32_ind32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -87,15 +85,13 @@ define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <vscale x 4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext <vscale x 4 x i32> [[WIDE_LOAD1]] to <vscale x 4 x i64>
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], <vscale x 4 x i64> [[TMP8]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float*> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], <vscale x 4 x i64> [[TMP8]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -109,13 +105,13 @@ define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias noc
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IDXPROM4]]
-; CHECK-NEXT:    store float [[TMP13]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM4]]
+; CHECK-NEXT:    store float [[TMP13]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -127,13 +123,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx3, align 4
   %idxprom4 = sext i32 %1 to i64
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %idxprom4
-  store float %0, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %idxprom4
+  store float %0, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0
@@ -142,7 +138,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
   ret void
 }
 
-define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @scatter_inv_nxv4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -154,16 +150,15 @@ define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocap
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32*> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
@@ -177,12 +172,12 @@ define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocap
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP10]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i32 3, i32* [[INV]], align 4
+; CHECK-NEXT:    store i32 3, ptr [[INV]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -196,13 +191,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  store i32 3, i32* %inv, align 4
+  store i32 3, ptr %inv, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -214,7 +209,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
   ret void
 }
 
-define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 {
+define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %inv, i64 %n) #0 {
 ; CHECK-LABEL: @gather_inv_nxv4i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -226,18 +221,16 @@ define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocaptur
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32*> poison, i32* [[INV:%.*]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32*> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[INV:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP4]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x i32>* [[TMP7]], i32 4, <vscale x 4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], i32 4, <vscale x 4 x i1> [[TMP6]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
@@ -251,13 +244,13 @@ define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocaptur
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[TMP11]], 3
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[INV]], align 4
-; CHECK-NEXT:    store i32 [[TMP12]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[INV]], align 4
+; CHECK-NEXT:    store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -271,14 +264,14 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp2 = icmp sgt i32 %0, 3
   br i1 %cmp2, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32, i32* %inv, align 4
-  store i32 %1, i32* %arrayidx, align 4
+  %1 = load i32, ptr %inv, align 4
+  store i32 %1, ptr %arrayidx, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -292,7 +285,7 @@ for.cond.cleanup:                                 ; preds = %for.inc, %entry
 
 
 
-define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) #0 {
+define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @gather_nxv4i32_ind64_stride2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -316,19 +309,17 @@ define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* no
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shl <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl <vscale x 4 x i64> [[STEP_ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[B:%.*]], <vscale x 4 x i64> [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[B]], <vscale x 4 x i64> [[TMP8]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], <vscale x 4 x float>* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], <vscale x 4 x i64> [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], <vscale x 4 x i64> [[TMP8]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP9]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl nuw nsw i32 [[TMP13]], 2
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast float* [[TMP16]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], <vscale x 4 x float>* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP15]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
@@ -344,10 +335,10 @@ define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture %a, float* no
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[INDVARS_IV_STRIDE2:%.*]] = shl i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_STRIDE2]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP21]], float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV_STRIDE2]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP21]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -360,10 +351,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %indvars.iv.stride2 = mul i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv.stride2
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %0, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv.stride2
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
index 84b1391..739010b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll
@@ -11,7 +11,7 @@ target triple = "aarch64-linux-gnu"
 ;       a[i] = b[i];
 ;   }
 
-define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 {
+define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
 ; CHECK-LABEL: @cond_ind64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -33,12 +33,10 @@ define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readon
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <vscale x 4 x i64> [[VEC_IND]] to <vscale x 4 x i1>
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], <vscale x 4 x i32>* [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP7]])
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[WIDE_MASKED_LOAD]], ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP7]])
 ; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP13:%.*]] = shl i64 [[TMP12]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
@@ -57,10 +55,10 @@ define void @cond_ind64(i32* noalias nocapture %a, i32* noalias nocapture readon
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i64 [[AND]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i32 [[TMP15]], i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i32 [[TMP15]], ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
@@ -79,10 +77,10 @@ for.body:                                         ; preds = %entry, %for.inc
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %i.08
-  store i32 %0, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %i.08
+  store i32 %0, ptr %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
index 90a7b10..05a6ebe 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll
@@ -1,28 +1,28 @@
 ; RUN: opt -S -passes=loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s
 
-define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
+define void @invariant_load(i64 %n, ptr noalias nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-LABEL: @invariant_load
 ; CHECK: vector.body:
-; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
-; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, i32* %[[GEP]]
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, ptr %b, i64 42
+; CHECK-NEXT: %[[INVLOAD:.*]] = load i32, ptr %[[GEP]]
 ; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i32 0
 ; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, ptr
 ; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[SPLAT]], %[[LOAD]]
-; CHECK: store <vscale x 4 x i32> %[[ADD]], <vscale x 4 x i32>*
+; CHECK: store <vscale x 4 x i32> %[[ADD]], ptr
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 42
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 42
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %iv
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %add, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
index 51fc8f9..31abba4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -1,25 +1,25 @@
 ; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize,dce,instcombine -S \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue <%s | FileCheck %s
 
-define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
+define void @stride7_i32(ptr noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_i32(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
 ; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds i32, i32* %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
-; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[PTRS]]
+; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds i32, ptr %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %[[PTRS]]
 ; CHECK-NEXT:   %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]],
-; CHECK-NEXT:   call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x i32*> %[[PTRS]]
+; CHECK-NEXT:   call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x ptr> %[[PTRS]]
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %mul = mul nuw nsw i64 %i.05, 7
-  %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %mul
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %dst, i64 %mul
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, 3
-  store i32 %add, i32* %arrayidx, align 4
+  store i32 %add, ptr %arrayidx, align 4
   %inc = add nuw nsw i64 %i.05, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -28,25 +28,25 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 {
+define void @stride7_f64(ptr noalias nocapture %dst, i64 %n) #0 {
 ; CHECK-LABEL: @stride7_f64(
 ; CHECK:      vector.body
 ; CHECK:        %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
 ; CHECK-NEXT:   %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
-; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]],
+; CHECK-NEXT:   %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %[[PTRS]],
 ; CHECK-NEXT:   %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
-; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]],
+; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %[[VALS]], <vscale x 2 x ptr> %[[PTRS]],
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %mul = mul nuw nsw i64 %i.05, 7
-  %arrayidx = getelementptr inbounds double, double* %dst, i64 %mul
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %dst, i64 %mul
+  %0 = load double, ptr %arrayidx, align 8
   %add = fadd double %0, 1.000000e+00
-  store double %add, double* %arrayidx, align 8
+  store double %add, ptr %arrayidx, align 8
   %inc = add nuw nsw i64 %i.05, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
@@ -56,30 +56,30 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 
-define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 {
+define void @cond_stride7_f64(ptr noalias nocapture %dst, ptr noalias nocapture readonly %cond, i64 %n) #0 {
 ; CHECK-LABEL: @cond_stride7_f64(
 ; CHECK:      vector.body
 ; CHECK:        %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
-; CHECK:        %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %{{.*}}
-; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]
+; CHECK:        %[[PTRS:.*]] = getelementptr inbounds double, ptr %dst, <vscale x 2 x i64> %{{.*}}
+; CHECK-NEXT:   %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]
 ; CHECK-NEXT:   %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
-; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])
+; CHECK-NEXT:  call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %[[VALS]], <vscale x 2 x ptr> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %cond, i64 %i.07
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %cond, i64 %i.07
+  %0 = load i64, ptr %arrayidx, align 8
   %tobool.not = icmp eq i64 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
   %mul = mul nsw i64 %i.07, 7
-  %arrayidx1 = getelementptr inbounds double, double* %dst, i64 %mul
-  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %dst, i64 %mul
+  %1 = load double, ptr %arrayidx1, align 8
   %add = fadd double %1, 1.000000e+00
-  store double %add, double* %arrayidx1, align 8
+  store double %add, ptr %arrayidx1, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
index eac9701..5a87b3b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
@@ -2,14 +2,14 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @trip7_i64(i64* noalias nocapture noundef %dst, i64* noalias nocapture noundef readonly %src) #0 {
+define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
 ; CHECK-LABEL: @trip7_i64(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK:         [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ {{%.*}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ]
-; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
-; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
-; CHECK:         call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> {{%.*}}, <vscale x 2 x i64>* {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
+; CHECK:         {{%.*}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
+; CHECK:         call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> {{%.*}}, ptr {{%.*}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK:         [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[VF:%.*]] = mul i64 [[VSCALE]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]]
@@ -23,13 +23,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.06
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.06
+  %0 = load i64, ptr %arrayidx, align 8
   %mul = shl nsw i64 %0, 1
-  %arrayidx1 = getelementptr inbounds i64, i64* %dst, i64 %i.06
-  %1 = load i64, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.06
+  %1 = load i64, ptr %arrayidx1, align 8
   %add = add nsw i64 %1, %mul
-  store i64 %add, i64* %arrayidx1, align 8
+  store i64 %add, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.06, 1
   %exitcond.not = icmp eq i64 %inc, 7
   br i1 %exitcond.not, label %for.end, label %for.body
@@ -38,19 +38,19 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @trip5_i8(i8* noalias nocapture noundef %dst, i8* noalias nocapture noundef readonly %src) #0 {
+define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
 ; CHECK-LABEL: @trip5_i8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[MUL:%.*]] = shl i8 [[TMP0]], 1
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DST:%.*]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]]
-; CHECK-NEXT:    store i8 [[ADD]], i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -62,13 +62,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %mul = shl i8 %0, 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %dst, i64 %i.08
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.08
+  %1 = load i8, ptr %arrayidx1, align 1
   %add = add i8 %mul, %1
-  store i8 %add, i8* %arrayidx1, align 1
+  store i8 %add, ptr %arrayidx1, align 1
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, 5
   br i1 %exitcond.not, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
index bd1e289..820fd88 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll
@@ -1,32 +1,30 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - | FileCheck %s
 
-define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @mloadstore_f32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mloadstore_f32
 ; CHECK: vector.body:
-; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
+; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x float>, ptr
 ; CHECK-NEXT:  %[[MASK:.*]] = fcmp ogt <vscale x 4 x float> %[[LOAD1]],
-; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr float, float* %a,
-; CHECK-NEXT:  %[[MLOAD_PTRS:.*]] = bitcast float* %[[GEPA]] to <vscale x 4 x float>*
-; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* %[[MLOAD_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]]
+; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr float, ptr %a,
+; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]]
 ; CHECK-NEXT:  %[[FADD:.*]] = fadd <vscale x 4 x float> %[[LOAD1]], %[[LOAD2]]
-; CHECK-NEXT:  %[[MSTORE_PTRS:.*]] = bitcast float* %[[GEPA]] to <vscale x 4 x float>*
-; CHECK-NEXT:  call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> %[[FADD]], <vscale x 4 x float>* %[[MSTORE_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]])
+; CHECK-NEXT:  call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %[[FADD]], ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]])
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %i.011
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.011
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 0.000000e+00
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx3 = getelementptr inbounds float, float* %a, i64 %i.011
-  %1 = load float, float* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.011
+  %1 = load float, ptr %arrayidx3, align 4
   %add = fadd float %0, %1
-  store float %add, float* %arrayidx3, align 4
+  store float %add, ptr %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -38,32 +36,30 @@ exit:                                 ; preds = %for.inc
   ret void
 }
 
-define void @mloadstore_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define void @mloadstore_i32(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mloadstore_i32
 ; CHECK: vector.body:
-; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK:       %[[LOAD1:.*]] = load <vscale x 4 x i32>, ptr
 ; CHECK-NEXT:  %[[MASK:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD1]],
-; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr i32, i32* %a,
-; CHECK-NEXT:  %[[MLOAD_PTRS:.*]] = bitcast i32* %[[GEPA]] to <vscale x 4 x i32>*
-; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[MLOAD_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]]
+; CHECK-NEXT:  %[[GEPA:.*]] = getelementptr i32, ptr %a,
+; CHECK-NEXT:  %[[LOAD2:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]]
 ; CHECK-NEXT:  %[[FADD:.*]] = add <vscale x 4 x i32> %[[LOAD1]], %[[LOAD2]]
-; CHECK-NEXT:  %[[MSTORE_PTRS:.*]] = bitcast i32* %[[GEPA]] to <vscale x 4 x i32>*
-; CHECK-NEXT:  call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> %[[FADD]], <vscale x 4 x i32>* %[[MSTORE_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]])
+; CHECK-NEXT:  call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[FADD]], ptr %[[GEPA]], i32 4, <vscale x 4 x i1> %[[MASK]])
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.011
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.011
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp ne i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %i.011
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %i.011
+  %1 = load i32, ptr %arrayidx3, align 4
   %add = add i32 %0, %1
-  store i32 %add, i32* %arrayidx3, align 4
+  store i32 %add, ptr %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
index 4296b46..4304105 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
@@ -20,7 +20,7 @@
 
 ; CHECK-LABEL: @scalable_load_in_loop
 ; CHECK-NOT: vector.body
-define void @scalable_load_in_loop(i64 %n, <vscale x 4 x i32>* %x, <vscale x 4 x i32>* %y) {
+define void @scalable_load_in_loop(i64 %n, ptr %x, ptr %y) {
 entry:
   br label %for.body
 
@@ -31,8 +31,8 @@ for.body:
   br i1 %cmp, label %for.inc, label %if.end
 
 if.end:
-  %0 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %y
-  store <vscale x 4 x i32> %0, <vscale x 4 x i32>* %x
+  %0 = load <vscale x 4 x i32>, ptr %y
+  store <vscale x 4 x i32> %0, ptr %x
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
index f960164..cf5fdc4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
@@ -5,7 +5,7 @@
 
 target triple = "aarch64-linux-gnu"
 
-define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
+define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -47,8 +47,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 7
   %6 = add nuw nsw i64 %0, 1
@@ -59,7 +59,7 @@ exit:                                     ; preds = %for.body
   ret i32 %5
 }
 
-define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
+define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_i32_from_icmp
 ; CHECK-VF4IC1:      vector.ph:
 ; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
@@ -86,8 +86,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ %a, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 %b
   %6 = add nuw nsw i64 %0, 1
@@ -98,7 +98,7 @@ exit:                                     ; preds = %for.body
   ret i32 %5
 }
 
-define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) #0 {
+define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -118,8 +118,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds float, float* %v, i64 %0
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %v, i64 %0
+  %3 = load float, ptr %2, align 4
   %4 = fcmp fast ueq float %3, 3.0
   %5 = select i1 %4, i32 %1, i32 1
   %6 = add nuw nsw i64 %0, 1
@@ -130,7 +130,7 @@ exit:                                     ; preds = %for.body
   ret i32 %5
 }
 
-define float @select_const_f32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
+define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @select_const_f32_from_icmp
 ; CHECK-VF4IC1-NOT: vector.body
 ; CHECK-VF4IC4-LABEL: @select_const_f32_from_icmp
@@ -141,8 +141,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select fast i1 %4, float %1, float 7.0
   %6 = add nuw nsw i64 %0, 1
@@ -153,13 +153,13 @@ exit:                                     ; preds = %for.body
   ret float %5
 }
 
-define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) #0 {
+define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
 ; CHECK-VF4IC1:        [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1:        [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
+; CHECK-VF4IC1:        [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
 ; CHECK-VF4IC1-NEXT:   [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1-NEXT:   [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1:        [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]]
@@ -176,14 +176,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %i.013
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %i.013
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 35
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %i.013
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %i.013
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp eq i32 %1, 2
   %spec.select = select i1 %cmp3, i32 1, i32 %r.012
   br label %for.inc
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
index 1ce996c..36dc30d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
@@ -12,15 +12,15 @@ target triple="aarch64-unknown-linux-gnu"
 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction:   %add = fadd float %0, %sum.07
 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction:   %add = fadd float %0, %sum.07
 
-define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -34,15 +34,15 @@ for.end:
 ; CHECK: Found an estimated cost of 8 for VF vscale x 2 For instruction:   %add = fadd double %0, %sum.07
 ; CHECK-CPU-NEOVERSE-N2: Found an estimated cost of 4 for VF vscale x 2 For instruction:   %add = fadd double %0, %sum.07
 
-define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) #0 {
+define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) #0 {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi double [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %a, i64 %iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %iv
+  %0 = load double, ptr %arrayidx, align 4
   %add = fadd double %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
index aa8ed299..bd3f222 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-type-conv.ll
@@ -3,7 +3,7 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 
-define void @f16_to_f32(float* noalias nocapture %dst, half* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f16_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f16_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fpext <vscale x 8 x half> %{{.*}} to <vscale x 8 x float>
@@ -12,11 +12,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds half, half* %src, i64 %i.07
-  %0 = load half, half* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr %src, i64 %i.07
+  %0 = load half, ptr %arrayidx, align 2
   %conv = fpext half %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -26,7 +26,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @f64_to_f32(float* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f64_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f64_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fptrunc <vscale x 8 x double> %{{.*}} to <vscale x 8 x float>
@@ -35,11 +35,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %src, i64 %i.07
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %src, i64 %i.07
+  %0 = load double, ptr %arrayidx, align 8
   %conv = fptrunc double %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -49,7 +49,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @f16_to_s8(i8* noalias nocapture %dst, half* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f16_to_s8(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f16_to_s8(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fptosi <vscale x 8 x half> %{{.*}} to <vscale x 8 x i8>
@@ -58,11 +58,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds half, half* %src, i64 %i.08
-  %0 = load half, half* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr %src, i64 %i.08
+  %0 = load half, ptr %arrayidx, align 2
   %conv1 = fptosi half %0 to i8
-  %arrayidx2 = getelementptr inbounds i8, i8* %dst, i64 %i.08
-  store i8 %conv1, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %dst, i64 %i.08
+  store i8 %conv1, ptr %arrayidx2, align 1
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -72,7 +72,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @f32_to_u64(i64* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %N) #0 {
+define void @f32_to_u64(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @f32_to_u64(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = fptoui <vscale x 8 x float> %{{.*}} to <vscale x 8 x i64>
@@ -81,11 +81,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %src, i64 %i.07
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fptoui float %0 to i64
-  %arrayidx1 = getelementptr inbounds i64, i64* %dst, i64 %i.07
-  store i64 %conv, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %dst, i64 %i.07
+  store i64 %conv, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -95,7 +95,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s8_to_f32(float* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s8_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s8_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = sitofp <vscale x 8 x i8> %{{.*}} to <vscale x 8 x float>
@@ -104,11 +104,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sitofp i8 %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -118,7 +118,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @u16_to_f32(float* noalias nocapture %dst, i16* noalias nocapture readonly %src, i64 %N) #0 {
+define void @u16_to_f32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @u16_to_f32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = uitofp <vscale x 8 x i16> %{{.*}} to <vscale x 8 x float>
@@ -127,11 +127,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %src, i64 %i.07
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %src, i64 %i.07
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = uitofp i16 %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %conv, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %dst, i64 %i.07
+  store float %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -141,7 +141,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @u64_to_f16(half* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 {
+define void @u64_to_f16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @u64_to_f16(
 ; CHECK:      vector.body
 ; CHECK:        %{{.*}} = uitofp <vscale x 8 x i64> %{{.*}} to <vscale x 8 x half>
@@ -150,11 +150,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %conv1 = uitofp i64 %0 to half
-  %arrayidx2 = getelementptr inbounds half, half* %dst, i64 %i.08
-  store half %conv1, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %dst, i64 %i.08
+  store half %conv1, ptr %arrayidx2, align 2
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -164,7 +164,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s64_to_f16(half* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s64_to_f16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s64_to_f16(
 ; CHECK:      vector.body
 ; CHECK:        %{{.*}} = sitofp <vscale x 8 x i64> %{{.*}} to <vscale x 8 x half>
@@ -173,11 +173,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %conv1 = sitofp i64 %0 to half
-  %arrayidx2 = getelementptr inbounds half, half* %dst, i64 %i.08
-  store half %conv1, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %dst, i64 %i.08
+  store half %conv1, ptr %arrayidx2, align 2
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -187,7 +187,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s8_to_s32(i32* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s8_to_s32(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s8_to_s32(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = sext <vscale x 8 x i8> %{{.*}} to <vscale x 8 x i32>
@@ -196,11 +196,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %dst, i64 %i.07
-  store i32 %conv, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %dst, i64 %i.07
+  store i32 %conv, ptr %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -210,7 +210,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @u8_to_u16(i16* noalias nocapture %dst, i8* noalias nocapture readonly %src, i64 %N) #0 {
+define void @u8_to_u16(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @u8_to_u16(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = zext <vscale x 8 x i8> %{{.*}} to <vscale x 8 x i16>
@@ -219,11 +219,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %src, i64 %i.07
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %src, i64 %i.07
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i16
-  %arrayidx1 = getelementptr inbounds i16, i16* %dst, i64 %i.07
-  store i16 %conv, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %dst, i64 %i.07
+  store i16 %conv, ptr %arrayidx1, align 2
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -233,7 +233,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 
-define void @s64_to_s8(i8* noalias nocapture %dst, i64* noalias nocapture readonly %src, i64 %N) #0 {
+define void @s64_to_s8(ptr noalias nocapture %dst, ptr noalias nocapture readonly %src, i64 %N) #0 {
 ; CHECK-LABEL: @s64_to_s8(
 ; CHECK: vector.body
 ; CHECK:   %{{.*}} = trunc <vscale x 8 x i64> %{{.*}} to <vscale x 8 x i8>
@@ -242,11 +242,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %src, i64 %i.07
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %src, i64 %i.07
+  %0 = load i64, ptr %arrayidx, align 8
   %conv = trunc i64 %0 to i8
-  %arrayidx1 = getelementptr inbounds i8, i8* %dst, i64 %i.07
-  store i8 %conv, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %dst, i64 %i.07
+  store i8 %conv, ptr %arrayidx1, align 1
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
index a40ff86..34e2f34 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll
@@ -16,13 +16,13 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @vector_reverse_mask_nxv4i1(double* %a, double* %cond, i64 %N) #0 {
+define void @vector_reverse_mask_nxv4i1(ptr %a, ptr %cond, i64 %N) #0 {
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[REVERSE6:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
-; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0nxv4f64(<vscale x 4 x double>* %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
+; CHECK: %[[WIDEMSKLOAD:.*]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
 ; CHECK-NEXT: %[[FADD:.*]] = fadd <vscale x 4 x double> %[[WIDEMSKLOAD]]
 ; CHECK:  %[[REVERSE9:.*]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.*}})
-; CHECK: call void @llvm.masked.store.nxv4f64.p0nxv4f64(<vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
+; CHECK: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> %[[FADD]], ptr %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE9]]
 
 entry:
   %cmp7 = icmp sgt i64 %N, 0
@@ -34,16 +34,16 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup, %
 for.body:                                         ; preds = %for.body, %entry
   %i.08.in = phi i64 [ %i.08, %for.inc ], [ %N, %entry ]
   %i.08 = add nsw i64 %i.08.in, -1
-  %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %tobool = fcmp une double %0, 0.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08
-  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08
+  %1 = load double, ptr %arrayidx1, align 8
   %add = fadd double %1, 1.000000e+00
-  store double %add, double* %arrayidx1, align 8
+  store double %add, ptr %arrayidx1, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index cc51c14..003a3f9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -8,7 +8,7 @@
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -S \
 ; RUN:   -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s
 
-define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b) #0{
+define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
 ; CHECK-LABEL: @vector_reverse_f64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0
@@ -28,23 +28,21 @@ define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[INDEX]], -1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], [[N]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP8:%.*]] = shl i32 [[TMP7]], 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 1, [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <vscale x 8 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, <vscale x 8 x double>* [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP10]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i32 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP16:%.*]] = shl i32 [[TMP15]], 3
 ; CHECK-NEXT:    [[TMP17:%.*]] = sub i32 1, [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP19]] to <vscale x 8 x double>*
-; CHECK-NEXT:    store <vscale x 8 x double> [[TMP14]], <vscale x 8 x double>* [[TMP20]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP13]], i64 [[TMP18]]
+; CHECK-NEXT:    store <vscale x 8 x double> [[TMP14]], ptr [[TMP19]], align 8
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP22:%.*]] = shl i64 [[TMP21]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
@@ -63,11 +61,11 @@ define void @vector_reverse_f64(i64 %N, double* noalias %a, double* noalias %b)
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_08]] = add nsw i64 [[I_08_IN]], -1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP24]], 1.000000e+00
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store double [[ADD]], double* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
 ;
@@ -81,21 +79,21 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.08.in = phi i64 [ %i.08, %for.body ], [ %N, %entry ]
   %i.08 = add nsw i64 %i.08.in, -1
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %add = fadd double %0, 1.000000e+00
-  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08
-  store double %add, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08
+  store double %add, ptr %arrayidx1, align 8
   %cmp = icmp sgt i64 %i.08.in, 1
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
 }
 
 
-define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 {
+define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-LABEL: @vector_reverse_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A2:%.*]] = ptrtoint i64* [[A:%.*]] to i64
-; CHECK-NEXT:    [[B1:%.*]] = ptrtoint i64* [[B:%.*]] to i64
+; CHECK-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B:%.*]] to i64
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i64 [[N:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
 ; CHECK:       for.body.preheader:
@@ -122,23 +120,21 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor i64 [[INDEX]], -1
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[TMP10]], [[N]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP14:%.*]] = shl i32 [[TMP13]], 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = sub i32 1, [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64* [[TMP17]] to <vscale x 8 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, <vscale x 8 x i64>* [[TMP18]], align 8
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP17]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i32 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP22:%.*]] = shl i32 [[TMP21]], 3
 ; CHECK-NEXT:    [[TMP23:%.*]] = sub i32 1, [[TMP22]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP19]], i64 [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = bitcast i64* [[TMP25]] to <vscale x 8 x i64>*
-; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP20]], <vscale x 8 x i64>* [[TMP26]], align 8
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i64 [[TMP24]]
+; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP20]], ptr [[TMP25]], align 8
 ; CHECK-NEXT:    [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP28:%.*]] = shl i64 [[TMP27]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]]
@@ -157,11 +153,11 @@ define void @vector_reverse_i64(i64 %N, i64* %a, i64* %b) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_09]] = add nsw i64 [[I_09_IN]], -1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_09]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP30]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_09]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX2]], align 8
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]]
 ;
@@ -175,11 +171,11 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.09.in = phi i64 [ %i.09, %for.body ], [ %N, %entry ]
   %i.09 = add nsw i64 %i.09.in, -1
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.09
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.09
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add i64 %0, 1
-  %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.09
-  store i64 %add, i64* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds i64, ptr %a, i64 %i.09
+  store i64 %add, ptr %arrayidx2, align 8
   %cmp = icmp sgt i64 %i.09.in, 1
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
 }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 3622e35..d9acd499 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -2,7 +2,7 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @widen_extractvalue(i64* %dst, {i64, i64} %sv) #0 {
+define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
 ; CHECK-LABEL: @widen_extractvalue(
 ; CHECK: vector.body:
 ; CHECK:        [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV:%.*]], 0
@@ -19,9 +19,9 @@ loop.body:
   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %a = extractvalue { i64, i64 } %sv, 0
   %b = extractvalue { i64, i64 } %sv, 1
-  %addr = getelementptr i64, i64* %dst, i32 %iv
+  %addr = getelementptr i64, ptr %dst, i32 %iv
   %add = add i64 %a, %b
-  store i64 %add, i64* %addr
+  store i64 %add, ptr %addr
   %iv.next = add nsw i32 %iv, 1
   %cond = icmp ne i32 %iv.next, 0
   br i1 %cond, label %loop.body, label %exit, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
index 837d4cc..7272a69 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
 ; CHECK-LABEL: test0
-define void @test0(i16* noalias %M3) {
+define void @test0(ptr noalias %M3) {
 entry:
   br label %if.then1165.us
 
@@ -14,8 +14,8 @@ if.then1165.us:                                   ; preds = %if.then1165.us, %en
   %add1178.us = add nsw i32 %conv1177.us, undef
   %conv1179.us = trunc i32 %add1178.us to i16
   %idxprom1181.us = ashr exact i64 undef, 32
-  %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
-  store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
+  %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us
+  store i16 %conv1179.us, ptr %arrayidx1185.us, align 2
   %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
   %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
   br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us
@@ -25,19 +25,19 @@ for.inc1286.loopexit:                             ; preds = %if.then1165.us
 }
 
 ; CHECK-LABEL: test1
-define void @test1(i16* noalias %M3) {
+define void @test1(ptr noalias %M3) {
 entry:
   br label %if.then1165.us
 
 if.then1165.us:                                   ; preds = %if.then1165.us, %entry
   %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ]
-  %fptr = load i32, i32* undef, align 4
+  %fptr = load i32, ptr undef, align 4
   %conv1177.us = zext i16 undef to i32
   %add1178.us = add nsw i32 %conv1177.us, %fptr
   %conv1179.us = trunc i32 %add1178.us to i16
   %idxprom1181.us = ashr exact i64 undef, 32
-  %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
-  store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
+  %arrayidx1185.us = getelementptr inbounds i16, ptr %M3, i64 %idxprom1181.us
+  store i16 %conv1179.us, ptr %arrayidx1185.us, align 2
   %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
   %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
   br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll
index fb12e9c..9b635bf 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/unsafe-vf-hint-remark.ll
@@ -9,24 +9,24 @@
 ; CHECK: LV: Selecting VF: 2.
 ; CHECK-LABEL: @test
 ; CHECK: <2 x i64>
-define void @test(i64* nocapture %a, i64* nocapture readonly %b) {
+define void @test(ptr nocapture %a, ptr nocapture readonly %b) {
 entry:
   br label %loop.header
 
 loop.header:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
-  %0 = load i64, i64* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %iv
-  %1 = load i64, i64* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
+  %0 = load i64, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 %iv
+  %1 = load i64, ptr %arrayidx2, align 4
   %add = add nsw i64 %1, %0
   %2 = add nuw nsw i64 %iv, 16
-  %arrayidx5 = getelementptr inbounds i64, i64* %a, i64 %2
+  %arrayidx5 = getelementptr inbounds i64, ptr %a, i64 %2
   %c = icmp eq i64 %1, 120
   br i1 %c, label %then, label %latch
 
 then:
-  store i64 %add, i64* %arrayidx5, align 4
+  store i64 %add, ptr %arrayidx5, align 4
   br label %latch
 
 latch:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
index 6cfdafe..c1e27ca 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "arm64-apple-darwin"
 
 declare float @expf(float) nounwind readnone
-define void @expf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @expf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @expf_v4f32(
 ; CHECK: call <4 x float> @_simd_exp_f4(
 ; CHECK: ret void
@@ -14,11 +14,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @expf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -28,7 +28,7 @@ for.end:
 }
 
 declare double @exp(double) nounwind readnone
-define void @exp_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @exp_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @exp_v2f64(
 ; CHECK: call <2 x double> @_simd_exp_d2(
 ; CHECK: ret void
@@ -38,11 +38,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @exp(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -52,7 +52,7 @@ for.end:
 }
 
 declare float @acosf(float) nounwind readnone
-define void @acos_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @acos_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acos_v4f32(
 ; CHECK: call <4 x float> @_simd_acos_f4(
 ; CHECK: ret void
@@ -62,11 +62,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @acosf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -76,7 +76,7 @@ for.end:
 }
 
 declare double @acos(double) nounwind readnone
-define void @acos_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @acos_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acos_v2f64(
 ; CHECK: call <2 x double> @_simd_acos_d2(
 ; CHECK: ret void
@@ -86,11 +86,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @acos(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -100,7 +100,7 @@ for.end:
 }
 
 declare float @asinf(float) nounwind readnone
-define void @asinf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @asinf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asinf_v4f32(
 ; CHECK: call <4 x float> @_simd_asin_f4(
 ; CHECK: ret void
@@ -110,11 +110,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @asinf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -124,7 +124,7 @@ for.end:
 }
 
 declare double @asin(double) nounwind readnone
-define void @asin_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @asin_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asin_v2f64(
 ; CHECK: call <2 x double> @_simd_asin_d2(
 ; CHECK: ret void
@@ -134,11 +134,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @asin(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -148,7 +148,7 @@ for.end:
 }
 
  declare float @atanf(float) nounwind readnone
-define void @atanf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @atanf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atanf_v4f32(
 ; CHECK: call <4 x float> @_simd_atan_f4(
 ; CHECK: ret void
@@ -158,11 +158,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @atanf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -172,7 +172,7 @@ for.end:
 }
 
 declare double @atan(double) nounwind readnone
-define void @atan_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @atan_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan_v2f64(
 ; CHECK: call <2 x double> @_simd_atan_d2(
 ; CHECK: ret void
@@ -182,11 +182,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @atan(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -196,7 +196,7 @@ for.end:
 }
 
 declare float @atan2f(float) nounwind readnone
-define void @atan2f_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @atan2f_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan2f_v4f32(
 ; CHECK: call <4 x float> @_simd_atan2_f4(
 ; CHECK: ret void
@@ -206,11 +206,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @atan2f(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -220,7 +220,7 @@ for.end:
 }
 
 declare double @atan2(double) nounwind readnone
-define void @atan2_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @atan2_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atan2_v2f64(
 ; CHECK: call <2 x double> @_simd_atan2_d2(
 ; CHECK: ret void
@@ -230,11 +230,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @atan2(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -244,7 +244,7 @@ for.end:
 }
 
 declare float @cosf(float) nounwind readnone
-define void @cosf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @cosf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cosf_v4f32(
 ; CHECK: call <4 x float> @_simd_cos_f4(
 ; CHECK: ret void
@@ -254,11 +254,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @cosf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -268,7 +268,7 @@ for.end:
 }
 
 declare double @cos(double) nounwind readnone
-define void @cos_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @cos_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cos_v2f64(
 ; CHECK: call <2 x double> @_simd_cos_d2(
 ; CHECK: ret void
@@ -278,11 +278,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @cos(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -292,7 +292,7 @@ for.end:
 }
 
 declare float @cbrtf(float) nounwind readnone
-define void @cbrtf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @cbrtf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cbrtf_v4f32(
 ; CHECK: call <4 x float> @_simd_cbrt_f4(
 ; CHECK: ret void
@@ -302,11 +302,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @cbrtf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -316,7 +316,7 @@ for.end:
 }
 
 declare double @cbrt(double) nounwind readnone
-define void @cbrt_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @cbrt_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cbrt_v2f64(
 ; CHECK: call <2 x double> @_simd_cbrt_d2(
 ; CHECK: ret void
@@ -326,11 +326,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @cbrt(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -340,7 +340,7 @@ for.end:
 }
 
 declare float @erff(float) nounwind readnone
-define void @erff_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @erff_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @erff_v4f32(
 ; CHECK: call <4 x float> @_simd_erf_f4(
 ; CHECK: ret void
@@ -350,11 +350,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @erff(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -364,7 +364,7 @@ for.end:
 }
 
 declare double @erf(double) nounwind readnone
-define void @erf_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @erf_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @erf_v2f64(
 ; CHECK: call <2 x double> @_simd_erf_d2(
 ; CHECK: ret void
@@ -374,11 +374,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @erf(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -388,7 +388,7 @@ for.end:
 }
 
 declare float @powf(float) nounwind readnone
-define void @powf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @powf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @powf_v4f32(
 ; CHECK: call <4 x float> @_simd_pow_f4(
 ; CHECK: ret void
@@ -398,11 +398,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @powf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -412,7 +412,7 @@ for.end:
 }
 
 declare double @pow(double) nounwind readnone
-define void @pow_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @pow_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @pow_v2f64(
 ; CHECK: call <2 x double> @_simd_pow_d2(
 ; CHECK: ret void
@@ -422,11 +422,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @pow(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -436,7 +436,7 @@ for.end:
 }
 
 declare float @sinhf(float) nounwind readnone
-define void @sinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @sinhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @sinhf_v4f32(
 ; CHECK: call <4 x float> @_simd_sinh_f4(
 ; CHECK: ret void
@@ -446,11 +446,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @sinhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -460,7 +460,7 @@ for.end:
 }
 
 declare double @sinh(double) nounwind readnone
-define void @sinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @sinh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @sinh_v2f64(
 ; CHECK: call <2 x double> @_simd_sinh_d2(
 ; CHECK: ret void
@@ -470,11 +470,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @sinh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -484,7 +484,7 @@ for.end:
 }
 
 declare float @coshf(float) nounwind readnone
-define void @coshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @coshf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @coshf_v4f32(
 ; CHECK: call <4 x float> @_simd_cosh_f4(
 ; CHECK: ret void
@@ -494,11 +494,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @coshf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -508,7 +508,7 @@ for.end:
 }
 
 declare double @cosh(double) nounwind readnone
-define void @cosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @cosh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @cosh_v2f64(
 ; CHECK: call <2 x double> @_simd_cosh_d2(
 ; CHECK: ret void
@@ -518,11 +518,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @cosh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -532,7 +532,7 @@ for.end:
 }
 
 declare float @tanhf(float) nounwind readnone
-define void @tanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @tanhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @tanhf_v4f32(
 ; CHECK: call <4 x float> @_simd_tanh_f4(
 ; CHECK: ret void
@@ -542,11 +542,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @tanhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -556,7 +556,7 @@ for.end:
 }
 
 declare double @tanh(double) nounwind readnone
-define void @tanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @tanh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @tanh_v2f64(
 ; CHECK: call <2 x double> @_simd_tanh_d2(
 ; CHECK: ret void
@@ -566,11 +566,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @tanh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -580,7 +580,7 @@ for.end:
 }
 
 declare float @asinhf(float) nounwind readnone
-define void @asinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @asinhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asinhf_v4f32(
 ; CHECK: call <4 x float> @_simd_asinh_f4(
 ; CHECK: ret void
@@ -590,11 +590,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @asinhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -604,7 +604,7 @@ for.end:
 }
 
 declare double @asinh(double) nounwind readnone
-define void @asinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @asinh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @asinh_v2f64(
 ; CHECK: call <2 x double> @_simd_asinh_d2(
 ; CHECK: ret void
@@ -614,11 +614,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @asinh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -628,7 +628,7 @@ for.end:
 }
 
 declare float @acoshf(float) nounwind readnone
-define void @acoshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @acoshf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acoshf_v4f32(
 ; CHECK: call <4 x float> @_simd_acosh_f4(
 ; CHECK: ret void
@@ -638,11 +638,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @acoshf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -652,7 +652,7 @@ for.end:
 }
 
 declare double @acosh(double) nounwind readnone
-define void @acosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @acosh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @acosh_v2f64(
 ; CHECK: call <2 x double> @_simd_acosh_d2(
 ; CHECK: ret void
@@ -662,11 +662,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @acosh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -676,7 +676,7 @@ for.end:
 }
 
 declare float @atanhf(float) nounwind readnone
-define void @atanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) {
+define void @atanhf_v4f32(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atanhf_v4f32(
 ; CHECK: call <4 x float> @_simd_atanh_f4(
 ; CHECK: ret void
@@ -686,11 +686,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %gep.y, align 4
+  %gep.y = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %gep.y, align 4
   %call = tail call float @atanhf(float %lv)
-  %gep.x = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %gep.x, align 4
+  %gep.x = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
@@ -700,7 +700,7 @@ for.end:
 }
 
 declare double @atanh(double) nounwind readnone
-define void @atanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) {
+define void @atanh_v2f64(i64 %n, ptr noalias %y, ptr noalias %x) {
 ; CHECK-LABEL: @atanh_v2f64(
 ; CHECK: call <2 x double> @_simd_atanh_d2(
 ; CHECK: ret void
@@ -710,11 +710,11 @@ entry:
 
 for.body:
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %gep.y = getelementptr inbounds double, double* %y, i64 %iv
-  %lv = load double, double* %gep.y, align 4
+  %gep.y = getelementptr inbounds double, ptr %y, i64 %iv
+  %lv = load double, ptr %gep.y, align 4
   %call = tail call double @atanh(double %lv)
-  %gep.x = getelementptr inbounds double, double* %x, i64 %iv
-  store double %call, double* %gep.x, align 4
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %iv
+  store double %call, ptr %gep.x, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
index 26511c1..ab50b32 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -17,7 +17,7 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
 
-define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond, i64 %N) #0 {
+define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N) #0 {
 ; CHECK-LABEL: @vector_reverse_mask_v4i1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0
@@ -33,34 +33,28 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = xor i64 [[INDEX]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[N]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, double* [[COND:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -3
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -3
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 -4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 -3
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x double> [[WIDE_LOAD1]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP8:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = fcmp une <4 x double> [[REVERSE2]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, double* [[TMP10]], i64 -3
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 -3
 ; CHECK-NEXT:    [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP12]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, double* [[TMP10]], i64 -4
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, double* [[TMP13]], i64 -3
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 -4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i64 -3
 ; CHECK-NEXT:    [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP9]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP15]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
 ; CHECK-NEXT:    [[TMP16:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
 ; CHECK-NEXT:    [[TMP17:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP16]], <4 x double>* [[TMP18]], i32 8, <4 x i1> [[REVERSE3]])
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP17]], <4 x double>* [[TMP19]], i32 8, <4 x i1> [[REVERSE5]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP16]], ptr [[TMP11]], i32 8, <4 x i1> [[REVERSE3]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP17]], ptr [[TMP14]], i32 8, <4 x i1> [[REVERSE5]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -77,15 +71,15 @@ define void @vector_reverse_mask_v4i1(double* noalias %a, double* noalias %cond,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_08]] = add nsw i64 [[I_08_IN]], -1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[COND]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[COND]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[TMP21]], 0.000000e+00
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP22:%.*]] = load double, double* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP22:%.*]] = load double, ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP22]], 1.000000e+00
-; CHECK-NEXT:    store double [[ADD]], double* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
@@ -102,16 +96,16 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup, %
 for.body:                                         ; preds = %for.body, %entry
   %i.08.in = phi i64 [ %i.08, %for.inc ], [ %N, %entry ]
   %i.08 = add nsw i64 %i.08.in, -1
-  %arrayidx = getelementptr inbounds double, double* %cond, i64 %i.08
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %cond, i64 %i.08
+  %0 = load double, ptr %arrayidx, align 8
   %tobool = fcmp une double %0, 0.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.08
-  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08
+  %1 = load double, ptr %arrayidx1, align 8
   %add = fadd double %1, 1.000000e+00
-  store double %add, double* %arrayidx1, align 8
+  store double %add, ptr %arrayidx1, align 8
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll
index cf7247f..b1f2ccd 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/divergent-runtime-check.ll
@@ -6,17 +6,17 @@
 ; GCN-NOT: store <2 x half>
 
 ; REMARK: remark: <unknown>:0:0: loop not vectorized: runtime pointer checks needed. Not enabled for divergent target
-define amdgpu_kernel void @runtime_check_divergent_target(half addrspace(1)* nocapture %a, half addrspace(1)* nocapture %b) #0 {
+define amdgpu_kernel void @runtime_check_divergent_target(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds half, half addrspace(1)* %b, i64 %indvars.iv
-  %load = load half, half addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds half, ptr addrspace(1) %b, i64 %indvars.iv
+  %load = load half, ptr addrspace(1) %arrayidx, align 4
   %mul = fmul half %load, 3.0
-  %arrayidx2 = getelementptr inbounds half, half addrspace(1)* %a, i64 %indvars.iv
-  store half %mul, half addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds half, ptr addrspace(1) %a, i64 %indvars.iv
+  store half %mul, ptr addrspace(1) %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll
index 26da04e..e43a14b 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-fp32.ll
@@ -4,15 +4,15 @@
 ; GFX90A-COUNT-2: load <2 x float>
 ; GFX90A-COUNT-2: fadd fast <2 x float>
 
-define float @vectorize_v2f32_loop(float addrspace(1)* noalias %s) {
+define float @vectorize_v2f32_loop(ptr addrspace(1) noalias %s) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %q.04 = phi float [ 0.0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float addrspace(1)* %s, i64 %indvars.iv
-  %load = load float, float addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %s, i64 %indvars.iv
+  %load = load float, ptr addrspace(1) %arrayidx, align 4
   %add = fadd fast float %q.04, %load
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
index 626e2f5..b29abbd 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
@@ -3,7 +3,7 @@
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=VI %s
 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s -passes=loop-vectorize,dce,instcombine -S | FileCheck -check-prefix=CI %s
 
-define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
+define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) {
 ; GFX9-LABEL: @vectorize_v2f16_loop(
 ; GFX9-NEXT:  entry:
 ; GFX9-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -13,12 +13,10 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; GFX9-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; GFX9-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; GFX9-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; GFX9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]]
-; GFX9-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)*
-; GFX9-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2
-; GFX9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2
-; GFX9-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)*
-; GFX9-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2
+; GFX9-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
+; GFX9-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
+; GFX9-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP0]], i64 2
+; GFX9-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP2]], align 2
 ; GFX9-NEXT:    [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
 ; GFX9-NEXT:    [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
 ; GFX9-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -45,12 +43,10 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; VI-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VI-NEXT:    [[VEC_PHI:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; VI-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x half> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; VI-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDEX]]
-; VI-NEXT:    [[TMP1:%.*]] = bitcast half addrspace(1)* [[TMP0]] to <2 x half> addrspace(1)*
-; VI-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP1]], align 2
-; VI-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, half addrspace(1)* [[TMP0]], i64 2
-; VI-NEXT:    [[TMP3:%.*]] = bitcast half addrspace(1)* [[TMP2]] to <2 x half> addrspace(1)*
-; VI-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, <2 x half> addrspace(1)* [[TMP3]], align 2
+; VI-NEXT:    [[TMP0:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDEX]]
+; VI-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP0]], align 2
+; VI-NEXT:    [[TMP2:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[TMP0]], i64 2
+; VI-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[TMP2]], align 2
 ; VI-NEXT:    [[TMP4]] = fadd fast <2 x half> [[VEC_PHI]], [[WIDE_LOAD]]
 ; VI-NEXT:    [[TMP5]] = fadd fast <2 x half> [[VEC_PHI1]], [[WIDE_LOAD2]]
 ; VI-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -74,8 +70,8 @@ define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
 ; CI:       for.body:
 ; CI-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CI-NEXT:    [[Q_04:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CI-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, half addrspace(1)* [[S:%.*]], i64 [[INDVARS_IV]]
-; CI-NEXT:    [[TMP0:%.*]] = load half, half addrspace(1)* [[ARRAYIDX]], align 2
+; CI-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr addrspace(1) [[S:%.*]], i64 [[INDVARS_IV]]
+; CI-NEXT:    [[TMP0:%.*]] = load half, ptr addrspace(1) [[ARRAYIDX]], align 2
 ; CI-NEXT:    [[ADD]] = fadd fast half [[Q_04]], [[TMP0]]
 ; CI-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CI-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
@@ -89,8 +85,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %q.04 = phi half [ 0.0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds half, half addrspace(1)* %s, i64 %indvars.iv
-  %0 = load half, half addrspace(1)* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr addrspace(1) %s, i64 %indvars.iv
+  %0 = load half, ptr addrspace(1) %arrayidx, align 2
   %add = fadd fast half %q.04, %0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll
index 528d06f..ac67257 100644
--- a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll
+++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll
@@ -7,17 +7,17 @@
 ; CHECK: store i32
 ; CHECK-NOT: store i32
 ; CHECK: ret
-define amdgpu_kernel void @small_loop(i32* nocapture %inArray, i32 %size) nounwind {
+define amdgpu_kernel void @small_loop(ptr nocapture %inArray, i32 %size) nounwind {
 entry:
   %0 = icmp sgt i32 %size, 0
   br i1 %0, label %loop, label %exit
 
 loop:                                          ; preds = %entry, %loop
   %iv = phi i32 [ %iv1, %loop ], [ 0, %entry ]
-  %1 = getelementptr inbounds i32, i32* %inArray, i32 %iv
-  %2 = load i32, i32* %1, align 4
+  %1 = getelementptr inbounds i32, ptr %inArray, i32 %iv
+  %2 = load i32, ptr %1, align 4
   %3 = add nsw i32 %2, 6
-  store i32 %3, i32* %1, align 4
+  store i32 %3, ptr %1, align 4
   %iv1 = add i32 %iv, 1
 ;  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %cond = icmp eq i32 %iv1, %size
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
index 151919f..aee0aa4 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'sumi'
 ; CHECK: We can vectorize this loop!
-define void @sumi(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
+define void @sumi(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -26,13 +26,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.06
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.06
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.06
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -51,7 +51,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; MVE: We can vectorize this loop!
 ; DARWIN: Checking a loop in 'sumf'
 ; DARWIN: We can vectorize this loop!
-define void @sumf(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @sumf(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -61,13 +61,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.06
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.06
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul float %0, %1
-  %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.06
+  store float %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -82,7 +82,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'redi'
 ; CHECK: We can vectorize this loop!
-define i32 @redi(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+define i32 @redi(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -93,10 +93,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %Red.06
   %inc = add nuw nsw i32 %i.07, 1
@@ -119,7 +119,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; MVE: We can vectorize this loop!
 ; DARWIN: Checking a loop in 'redf'
 ; DARWIN: We can vectorize this loop!
-define float @redf(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) {
+define float @redf(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -130,10 +130,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul float %0, %1
   %add = fadd float %Red.06, %mul
   %inc = add nuw nsw i32 %i.07, 1
@@ -154,21 +154,21 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; LINUX: Potentially unsafe FP op prevents vectorization
 ; DARWIN: Checking a loop in 'fabs'
 ; DARWIN: We can vectorize this loop!
-define void @fabs(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @fabs(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp10 = icmp eq i32 %N, 0
   br i1 %cmp10, label %for.end, label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.011
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.011
+  %1 = load float, ptr %arrayidx1, align 4
   %fabsf = tail call float @fabsf(float %1) #1
   %conv3 = fmul float %0, %fabsf
-  %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011
-  store float %conv3, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %C, i32 %i.011
+  store float %conv3, ptr %arrayidx4, align 4
   %inc = add nuw nsw i32 %i.011, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end, label %for.body
@@ -180,7 +180,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'sumi_fast'
 ; CHECK: We can vectorize this loop!
-define void @sumi_fast(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
+define void @sumi_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -190,13 +190,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.06
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.06
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.06
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -211,7 +211,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Floating-point loops can be vectorizeable with fast-math
 ; CHECK: Checking a loop in 'sumf_fast'
 ; CHECK: We can vectorize this loop!
-define void @sumf_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @sumf_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -221,13 +221,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.06
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.06
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul fast float %1, %0
-  %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %C, i32 %i.06
+  store float %mul, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -242,7 +242,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Integer loops are always vectorizeable
 ; CHECK: Checking a loop in 'redi_fast'
 ; CHECK: We can vectorize this loop!
-define i32 @redi_fast(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+define i32 @redi_fast(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -253,10 +253,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i32 %i.07
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %Red.06
   %inc = add nuw nsw i32 %i.07, 1
@@ -275,7 +275,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Floating-point loops can be vectorizeable with fast-math
 ; CHECK: Checking a loop in 'redf_fast'
 ; CHECK: We can vectorize this loop!
-define float @redf_fast(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) {
+define float @redf_fast(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i32 %N) {
 entry:
   %cmp5 = icmp eq i32 %N, 0
   br i1 %cmp5, label %for.end, label %for.body.preheader
@@ -286,10 +286,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i32 %i.07
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %b, i32 %i.07
+  %1 = load float, ptr %arrayidx1, align 4
   %mul = fmul fast float %1, %0
   %add = fadd fast float %mul, %Red.06
   %inc = add nuw nsw i32 %i.07, 1
@@ -308,21 +308,21 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; Make sure calls that turn into builtins are also covered
 ; CHECK: Checking a loop in 'fabs_fast'
 ; CHECK: We can vectorize this loop!
-define void @fabs_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+define void @fabs_fast(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
 entry:
   %cmp10 = icmp eq i32 %N, 0
   br i1 %cmp10, label %for.end, label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %i.011
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %B, i32 %i.011
+  %1 = load float, ptr %arrayidx1, align 4
   %fabsf = tail call fast float @fabsf(float %1) #2
   %conv3 = fmul fast float %fabsf, %0
-  %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011
-  store float %conv3, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %C, i32 %i.011
+  store float %conv3, ptr %arrayidx4, align 4
   %inc = add nuw nsw i32 %i.011, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
index b1c400a..9ad9347 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -13,15 +13,15 @@ target triple = "thumbv7-apple-ios3.0.0"
 ;SWIFT: load <4 x i32>
 ;SWIFT: load <4 x i32>
 ;SWIFT: ret
-define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
+define i32 @foo(ptr nocapture %A, i32 %n) nounwind readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
   %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i32 %i.02
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i32 %i.02
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, %sum.01
   %5 = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %5, %n
@@ -36,7 +36,7 @@ define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
 ;SWIFTUNROLL-LABEL: @register_limit(
 ;SWIFTUNROLL: load i32
 ;SWIFTUNROLL-NOT: load i32
-define i32 @register_limit(i32* nocapture %A, i32 %n) {
+define i32 @register_limit(ptr nocapture %A, i32 %n) {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -48,8 +48,8 @@ define i32 @register_limit(i32* nocapture %A, i32 %n) {
   %sum.04 = phi i32 [ %8, %.lr.ph ], [ 0, %0 ]
   %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
   %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i32 %i.02
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i32 %i.02
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, %sum.01
   %5 = add nsw i32 %i.02, 1
   %6 = add nsw i32 %3, %sum.02
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
index 93f736e..5b0a7fa 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
@@ -33,32 +33,32 @@ for.body:
   %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
   %add = add i32 %v.055, %offset
   %mul = mul i32 %add, 3
-  %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %mul
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i32 0, i32 %v.055
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %mul
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i32 0, i32 %v.055
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
-  %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i32 0, i32 %v.055
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i32 0, i32 %v.055
+  %2 = load float, ptr %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
-  %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i32 0, i32 %v.055
-  %3 = load float, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i32 0, i32 %v.055
+  %3 = load float, ptr %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
-  %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i32 0, i32 %v.055
-  %4 = load float, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i32 0, i32 %v.055
+  %4 = load float, ptr %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i32 %mul, 1
-  %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum
-  %5 = load float, float* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %arrayidx.sum
+  %5 = load float, ptr %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
   %mul19 = fmul fast float %4, %mul17
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i32 %mul, 2
-  %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum52
-  %6 = load float, float* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i32 0, i32 %arrayidx.sum52
+  %6 = load float, ptr %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
@@ -81,8 +81,8 @@ for.end:
   %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
-  store i8 %r.0.lcssa, i8* @r_, align 4
-  store i8 %g.0.lcssa, i8* @g_, align 4
-  store i8 %b.0.lcssa, i8* @b_, align 4
+  store i8 %r.0.lcssa, ptr @r_, align 4
+  store i8 %g.0.lcssa, ptr @g_, align 4
+  store i8 %b.0.lcssa, ptr @b_, align 4
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
index def0f5d..cb8899c 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -18,13 +18,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -39,16 +39,16 @@ define void @example1() nounwind uwtable ssp {
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10b(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %3 = load i16, ptr %2, align 2
   %4 = sext i16 %3 to i32
-  %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %4, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %4, ptr %5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
index 3142632..d08c16d 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -8,28 +8,28 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "armv8--linux-gnueabihf"
 
 %i8.2 = type {i8, i8}
-define void @i8_factor_2(%i8.2* %data, i64 %n) {
+define void @i8_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
-; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
-  %tmp2 = load i8, i8* %tmp0, align 1
-  %tmp3 = load i8, i8* %tmp1, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
+  %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i8, ptr %tmp0, align 1
+  %tmp3 = load i8, ptr %tmp1, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -39,33 +39,33 @@ for.end:
 }
 
 %i16.2 = type {i16, i16}
-define void @i16_factor_2(%i16.2* %data, i64 %n) {
+define void @i16_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
-; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
-  %tmp2 = load i16, i16* %tmp0, align 2
-  %tmp3 = load i16, i16* %tmp1, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i16, ptr %tmp0, align 2
+  %tmp3 = load i16, ptr %tmp1, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -75,38 +75,38 @@ for.end:
 }
 
 %i32.2 = type {i32, i32}
-define void @i32_factor_2(%i32.2* %data, i64 %n) {
+define void @i32_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -116,28 +116,28 @@ for.end:
 }
 
 %half.2 = type {half, half}
-define void @half_factor_2(%half.2* %data, i64 %n) {
+define void @half_factor_2(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 ; VF_4-LABEL: Checking a loop in 'half_factor_2'
-; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_8-LABEL: Checking a loop in 'half_factor_2'
-; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %half.2, %half.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %half.2, %half.2* %data, i64 %i, i32 1
-  %tmp2 = load half, half* %tmp0, align 2
-  %tmp3 = load half, half* %tmp1, align 2
-  store half 0., half* %tmp0, align 2
-  store half 0., half* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load half, ptr %tmp0, align 2
+  %tmp3 = load half, ptr %tmp1, align 2
+  store half 0., ptr %tmp0, align 2
+  store half 0., ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
index eccebdc..97aa203 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
@@ -10,38 +10,38 @@ target triple = "thumbv8.1m.main-none-eabi"
 ; Factor 2
 
 %i8.2 = type {i8, i8}
-define void @i8_factor_2(%i8.2* %data, i64 %n) #0 {
+define void @i8_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_4-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
-; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
+; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
-  %tmp2 = load i8, i8* %tmp0, align 1
-  %tmp3 = load i8, i8* %tmp1, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
+  %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i8, ptr %tmp0, align 1
+  %tmp3 = load i8, ptr %tmp1, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -51,38 +51,38 @@ for.end:
 }
 
 %i16.2 = type {i16, i16}
-define void @i16_factor_2(%i16.2* %data, i64 %n) #0 {
+define void @i16_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
-  %tmp2 = load i16, i16* %tmp0, align 2
-  %tmp3 = load i16, i16* %tmp1, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i16, ptr %tmp0, align 2
+  %tmp3 = load i16, ptr %tmp1, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -92,38 +92,38 @@ for.end:
 }
 
 %i32.2 = type {i32, i32}
-define void @i32_factor_2(%i32.2* %data, i64 %n) #0 {
+define void @i32_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
-; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
-; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
+; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
-  %tmp2 = load i32, i32* %tmp0, align 4
-  %tmp3 = load i32, i32* %tmp1, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i32, ptr %tmp0, align 4
+  %tmp3 = load i32, ptr %tmp1, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -133,38 +133,38 @@ for.end:
 }
 
 %i64.2 = type {i64, i64}
-define void @i64_factor_2(%i64.2* %data, i64 %n) #0 {
+define void @i64_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_2:          Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_2:          Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_4:          Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_4:          Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_2'
-; VF_8:          Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_8:          Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_2'
-; VF_16:         Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
+; VF_16:         Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1
-  %tmp2 = load i64, i64* %tmp0, align 8
-  %tmp3 = load i64, i64* %tmp1, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load i64, ptr %tmp0, align 8
+  %tmp3 = load i64, ptr %tmp1, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -174,38 +174,38 @@ for.end:
 }
 
 %f16.2 = type {half, half}
-define void @f16_factor_2(%f16.2* %data, i64 %n) #0 {
+define void @f16_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f16_factor_2'
-; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_2'
-; VF_4:          Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_4:          Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_2'
-; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_2'
-; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
+; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 1
-  %tmp2 = load half, half* %tmp0, align 2
-  %tmp3 = load half, half* %tmp1, align 2
-  store half 0.0, half* %tmp0, align 2
-  store half 0.0, half* %tmp1, align 2
+  %tmp0 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load half, ptr %tmp0, align 2
+  %tmp3 = load half, ptr %tmp1, align 2
+  store half 0.0, ptr %tmp0, align 2
+  store half 0.0, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -215,38 +215,38 @@ for.end:
 }
 
 %f32.2 = type {float, float}
-define void @f32_factor_2(%f32.2* %data, i64 %n) #0 {
+define void @f32_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f32_factor_2'
-; VF_2:          Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_2:          Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_2'
-; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_2'
-; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_2'
-; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
+; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 1
-  %tmp2 = load float, float* %tmp0, align 4
-  %tmp3 = load float, float* %tmp1, align 4
-  store float 0.0, float* %tmp0, align 4
-  store float 0.0, float* %tmp1, align 4
+  %tmp0 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load float, ptr %tmp0, align 4
+  %tmp3 = load float, ptr %tmp1, align 4
+  store float 0.0, ptr %tmp0, align 4
+  store float 0.0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -256,38 +256,38 @@ for.end:
 }
 
 %f64.2 = type {double, double}
-define void @f64_factor_2(%f64.2* %data, i64 %n) #0 {
+define void @f64_factor_2(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f64_factor_2'
-; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_2'
-; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_2'
-; VF_8:          Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 32 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_8:          Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 32 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_2'
-; VF_16:         Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 64 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
+; VF_16:         Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 64 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 1
-  %tmp2 = load double, double* %tmp0, align 8
-  %tmp3 = load double, double* %tmp1, align 8
-  store double 0.0, double* %tmp0, align 8
-  store double 0.0, double* %tmp1, align 8
+  %tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 1
+  %tmp2 = load double, ptr %tmp0, align 8
+  %tmp3 = load double, ptr %tmp1, align 8
+  store double 0.0, ptr %tmp0, align 8
+  store double 0.0, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -301,49 +301,49 @@ for.end:
 ; Factor 3
 
 %i8.3 = type {i8, i8, i8}
-define void @i8_factor_3(%i8.3* %data, i64 %n) #0 {
+define void @i8_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i8_factor_3'
-; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i8 0, ptr %tmp2, align 1
 ; VF_4-LABEL:  Checking a loop in 'i8_factor_3'
-; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i8 0, ptr %tmp2, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_3'
-; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i8 0, ptr %tmp2, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_3'
-; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
+; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i8 0, ptr %tmp2, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 2
-  %tmp3 = load i8, i8* %tmp0, align 1
-  %tmp4 = load i8, i8* %tmp1, align 1
-  %tmp5 = load i8, i8* %tmp2, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
-  store i8 0, i8* %tmp2, align 1
+  %tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i8, ptr %tmp0, align 1
+  %tmp4 = load i8, ptr %tmp1, align 1
+  %tmp5 = load i8, ptr %tmp2, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
+  store i8 0, ptr %tmp2, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -353,49 +353,49 @@ for.end:
 }
 
 %i16.3 = type {i16, i16, i16}
-define void @i16_factor_3(%i16.3* %data, i64 %n) #0 {
+define void @i16_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i16_factor_3'
-; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i16 0, ptr %tmp2, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_3'
-; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i16 0, ptr %tmp2, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_3'
-; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i16 0, ptr %tmp2, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_3'
-; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
+; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i16 0, ptr %tmp2, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 2
-  %tmp3 = load i16, i16* %tmp0, align 2
-  %tmp4 = load i16, i16* %tmp1, align 2
-  %tmp5 = load i16, i16* %tmp2, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
-  store i16 0, i16* %tmp2, align 2
+  %tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i16, ptr %tmp0, align 2
+  %tmp4 = load i16, ptr %tmp1, align 2
+  %tmp5 = load i16, ptr %tmp2, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
+  store i16 0, ptr %tmp2, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -405,49 +405,49 @@ for.end:
 }
 
 %i32.3 = type {i32, i32, i32}
-define void @i32_factor_3(%i32.3* %data, i64 %n) #0 {
+define void @i32_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_3'
-; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i32 0, ptr %tmp2, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_3'
-; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i32 0, ptr %tmp2, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_3'
-; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i32 0, ptr %tmp2, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_3'
-; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
+; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i32 0, ptr %tmp2, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 2
-  %tmp3 = load i32, i32* %tmp0, align 4
-  %tmp4 = load i32, i32* %tmp1, align 4
-  %tmp5 = load i32, i32* %tmp2, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
-  store i32 0, i32* %tmp2, align 4
+  %tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i32, ptr %tmp0, align 4
+  %tmp4 = load i32, ptr %tmp1, align 4
+  %tmp5 = load i32, ptr %tmp2, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
+  store i32 0, ptr %tmp2, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -457,49 +457,49 @@ for.end:
 }
 
 %i64.3 = type {i64, i64, i64}
-define void @i64_factor_3(%i64.3* %data, i64 %n) #0 {
+define void @i64_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_3'
-; VF_2:          Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_2:          Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i64 0, ptr %tmp2, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_3'
-; VF_4:          Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 36 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_4:          Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 36 for VF 4 For instruction: store i64 0, ptr %tmp2, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_3'
-; VF_8:          Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 72 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_8:          Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 72 for VF 8 For instruction: store i64 0, ptr %tmp2, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_3'
-; VF_16:         Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 144 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8
+; VF_16:         Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 144 for VF 16 For instruction: store i64 0, ptr %tmp2, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 2
-  %tmp3 = load i64, i64* %tmp0, align 8
-  %tmp4 = load i64, i64* %tmp1, align 8
-  %tmp5 = load i64, i64* %tmp2, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
-  store i64 0, i64* %tmp2, align 8
+  %tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load i64, ptr %tmp0, align 8
+  %tmp4 = load i64, ptr %tmp1, align 8
+  %tmp5 = load i64, ptr %tmp2, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
+  store i64 0, ptr %tmp2, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -509,49 +509,49 @@ for.end:
 }
 
 %f16.3 = type {half, half, half}
-define void @f16_factor_3(%f16.3* %data, i64 %n) #0 {
+define void @f16_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f16_factor_3'
-; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, ptr %tmp2, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_3'
-; VF_4:          Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_4:          Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store half 0xH0000, ptr %tmp2, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_3'
-; VF_8:          Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_8:          Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store half 0xH0000, ptr %tmp2, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_3'
-; VF_16:         Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2
+; VF_16:         Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store half 0xH0000, ptr %tmp2, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 2
-  %tmp3 = load half, half* %tmp0, align 2
-  %tmp4 = load half, half* %tmp1, align 2
-  %tmp5 = load half, half* %tmp2, align 2
-  store half 0.0, half* %tmp0, align 2
-  store half 0.0, half* %tmp1, align 2
-  store half 0.0, half* %tmp2, align 2
+  %tmp0 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load half, ptr %tmp0, align 2
+  %tmp4 = load half, ptr %tmp1, align 2
+  %tmp5 = load half, ptr %tmp2, align 2
+  store half 0.0, ptr %tmp0, align 2
+  store half 0.0, ptr %tmp1, align 2
+  store half 0.0, ptr %tmp2, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -561,49 +561,49 @@ for.end:
 }
 
 %f32.3 = type {float, float, float}
-define void @f32_factor_3(%f32.3* %data, i64 %n) #0 {
+define void @f32_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f32_factor_3'
-; VF_2:          Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_2:          Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_3'
-; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_3'
-; VF_8:          Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_8:          Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_3'
-; VF_16:         Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4
+; VF_16:         Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 2
-  %tmp3 = load float, float* %tmp0, align 4
-  %tmp4 = load float, float* %tmp1, align 4
-  %tmp5 = load float, float* %tmp2, align 4
-  store float 0.0, float* %tmp0, align 4
-  store float 0.0, float* %tmp1, align 4
-  store float 0.0, float* %tmp2, align 4
+  %tmp0 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load float, ptr %tmp0, align 4
+  %tmp4 = load float, ptr %tmp1, align 4
+  %tmp5 = load float, ptr %tmp2, align 4
+  store float 0.0, ptr %tmp0, align 4
+  store float 0.0, ptr %tmp1, align 4
+  store float 0.0, ptr %tmp2, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -613,49 +613,49 @@ for.end:
 }
 
 %f64.3 = type {double, double, double}
-define void @f64_factor_3(%f64.3* %data, i64 %n) #0 {
+define void @f64_factor_3(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f64_factor_3'
-; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_3'
-; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_3'
-; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_3'
-; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8
+; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 2
-  %tmp3 = load double, double* %tmp0, align 8
-  %tmp4 = load double, double* %tmp1, align 8
-  %tmp5 = load double, double* %tmp2, align 8
-  store double 0.0, double* %tmp0, align 8
-  store double 0.0, double* %tmp1, align 8
-  store double 0.0, double* %tmp2, align 8
+  %tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 2
+  %tmp3 = load double, ptr %tmp0, align 8
+  %tmp4 = load double, ptr %tmp1, align 8
+  %tmp5 = load double, ptr %tmp2, align 8
+  store double 0.0, ptr %tmp0, align 8
+  store double 0.0, ptr %tmp1, align 8
+  store double 0.0, ptr %tmp2, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -668,60 +668,60 @@ for.end:
 ; Factor 4
 
 %i8.4 = type {i8, i8, i8, i8}
-define void @i8_factor_4(%i8.4* %data, i64 %n) #0 {
+define void @i8_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i8_factor_4'
-; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i8 0, ptr %tmp3, align 1
 ; VF_4-LABEL: Checking a loop in 'i8_factor_4'
-; VF_4:         Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_4:         Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store i8 0, ptr %tmp3, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_4'
-; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i8 0, ptr %tmp3, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_4'
-; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1
+; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i8 0, ptr %tmp3, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 3
-  %tmp4 = load i8, i8* %tmp0, align 1
-  %tmp5 = load i8, i8* %tmp1, align 1
-  %tmp6 = load i8, i8* %tmp2, align 1
-  %tmp7 = load i8, i8* %tmp3, align 1
-  store i8 0, i8* %tmp0, align 1
-  store i8 0, i8* %tmp1, align 1
-  store i8 0, i8* %tmp2, align 1
-  store i8 0, i8* %tmp3, align 1
+  %tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i8, ptr %tmp0, align 1
+  %tmp5 = load i8, ptr %tmp1, align 1
+  %tmp6 = load i8, ptr %tmp2, align 1
+  %tmp7 = load i8, ptr %tmp3, align 1
+  store i8 0, ptr %tmp0, align 1
+  store i8 0, ptr %tmp1, align 1
+  store i8 0, ptr %tmp2, align 1
+  store i8 0, ptr %tmp3, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -731,60 +731,60 @@ for.end:
 }
 
 %i16.4 = type {i16, i16, i16, i16}
-define void @i16_factor_4(%i16.4* %data, i64 %n) #0 {
+define void @i16_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i16_factor_4'
-; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i16 0, ptr %tmp3, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_4'
-; VF_4:          Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_4:          Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i16 0, ptr %tmp3, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_4'
-; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i16 0, ptr %tmp3, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_4'
-; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2
+; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i16 0, ptr %tmp3, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 3
-  %tmp4 = load i16, i16* %tmp0, align 2
-  %tmp5 = load i16, i16* %tmp1, align 2
-  %tmp6 = load i16, i16* %tmp2, align 2
-  %tmp7 = load i16, i16* %tmp3, align 2
-  store i16 0, i16* %tmp0, align 2
-  store i16 0, i16* %tmp1, align 2
-  store i16 0, i16* %tmp2, align 2
-  store i16 0, i16* %tmp3, align 2
+  %tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i16, ptr %tmp0, align 2
+  %tmp5 = load i16, ptr %tmp1, align 2
+  %tmp6 = load i16, ptr %tmp2, align 2
+  %tmp7 = load i16, ptr %tmp3, align 2
+  store i16 0, ptr %tmp0, align 2
+  store i16 0, ptr %tmp1, align 2
+  store i16 0, ptr %tmp2, align 2
+  store i16 0, ptr %tmp3, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -794,60 +794,60 @@ for.end:
 }
 
 %i32.4 = type {i32, i32, i32, i32}
-define void @i32_factor_4(%i32.4* %data, i64 %n) #0 {
+define void @i32_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_4'
-; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i32 0, ptr %tmp3, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_4'
-; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i32 0, ptr %tmp3, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_4'
-; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i32 0, ptr %tmp3, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_4'
-; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4
+; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i32 0, ptr %tmp3, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 3
-  %tmp4 = load i32, i32* %tmp0, align 4
-  %tmp5 = load i32, i32* %tmp1, align 4
-  %tmp6 = load i32, i32* %tmp2, align 4
-  %tmp7 = load i32, i32* %tmp3, align 4
-  store i32 0, i32* %tmp0, align 4
-  store i32 0, i32* %tmp1, align 4
-  store i32 0, i32* %tmp2, align 4
-  store i32 0, i32* %tmp3, align 4
+  %tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i32, ptr %tmp0, align 4
+  %tmp5 = load i32, ptr %tmp1, align 4
+  %tmp6 = load i32, ptr %tmp2, align 4
+  %tmp7 = load i32, ptr %tmp3, align 4
+  store i32 0, ptr %tmp0, align 4
+  store i32 0, ptr %tmp1, align 4
+  store i32 0, ptr %tmp2, align 4
+  store i32 0, ptr %tmp3, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -857,60 +857,60 @@ for.end:
 }
 
 %i64.4 = type {i64, i64, i64, i64}
-define void @i64_factor_4(%i64.4* %data, i64 %n) #0 {
+define void @i64_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_4'
-; VF_2:          Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_2:          Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i64 0, ptr %tmp3, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_4'
-; VF_4:          Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 48 for VF 4 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_4:          Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 48 for VF 4 For instruction: store i64 0, ptr %tmp3, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_4'
-; VF_8:          Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 96 for VF 8 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_8:          Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 96 for VF 8 For instruction: store i64 0, ptr %tmp3, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_4'
-; VF_16:         Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 192 for VF 16 For instruction: store i64 0, i64* %tmp3, align 8
+; VF_16:         Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 192 for VF 16 For instruction: store i64 0, ptr %tmp3, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 3
-  %tmp4 = load i64, i64* %tmp0, align 8
-  %tmp5 = load i64, i64* %tmp1, align 8
-  %tmp6 = load i64, i64* %tmp2, align 8
-  %tmp7 = load i64, i64* %tmp3, align 8
-  store i64 0, i64* %tmp0, align 8
-  store i64 0, i64* %tmp1, align 8
-  store i64 0, i64* %tmp2, align 8
-  store i64 0, i64* %tmp3, align 8
+  %tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load i64, ptr %tmp0, align 8
+  %tmp5 = load i64, ptr %tmp1, align 8
+  %tmp6 = load i64, ptr %tmp2, align 8
+  %tmp7 = load i64, ptr %tmp3, align 8
+  store i64 0, ptr %tmp0, align 8
+  store i64 0, ptr %tmp1, align 8
+  store i64 0, ptr %tmp2, align 8
+  store i64 0, ptr %tmp3, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -920,60 +920,60 @@ for.end:
 }
 
 %f16.4 = type {half, half, half, half}
-define void @f16_factor_4(%f16.4* %data, i64 %n) #0 {
+define void @f16_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f16_factor_4'
-; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store half 0xH0000, ptr %tmp3, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_4'
-; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, ptr %tmp3, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_4'
-; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, ptr %tmp3, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_4'
-; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, half* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, half* %tmp3, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2
+; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, ptr %tmp3, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store half 0xH0000, ptr %tmp3, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 3
-  %tmp4 = load half, half* %tmp0, align 2
-  %tmp5 = load half, half* %tmp1, align 2
-  %tmp6 = load half, half* %tmp2, align 2
-  %tmp7 = load half, half* %tmp3, align 2
-  store half 0.0, half* %tmp0, align 2
-  store half 0.0, half* %tmp1, align 2
-  store half 0.0, half* %tmp2, align 2
-  store half 0.0, half* %tmp3, align 2
+  %tmp0 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load half, ptr %tmp0, align 2
+  %tmp5 = load half, ptr %tmp1, align 2
+  %tmp6 = load half, ptr %tmp2, align 2
+  %tmp7 = load half, ptr %tmp3, align 2
+  store half 0.0, ptr %tmp0, align 2
+  store half 0.0, ptr %tmp1, align 2
+  store half 0.0, ptr %tmp2, align 2
+  store half 0.0, ptr %tmp3, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -983,60 +983,60 @@ for.end:
 }
 
 %f32.4 = type {float, float, float, float}
-define void @f32_factor_4(%f32.4* %data, i64 %n) #0 {
+define void @f32_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f32_factor_4'
-; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_4'
-; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_4'
-; VF_8:          Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_8:          Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_4'
-; VF_16:         Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, float* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, float* %tmp3, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4
+; VF_16:         Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 3
-  %tmp4 = load float, float* %tmp0, align 4
-  %tmp5 = load float, float* %tmp1, align 4
-  %tmp6 = load float, float* %tmp2, align 4
-  %tmp7 = load float, float* %tmp3, align 4
-  store float 0.0, float* %tmp0, align 4
-  store float 0.0, float* %tmp1, align 4
-  store float 0.0, float* %tmp2, align 4
-  store float 0.0, float* %tmp3, align 4
+  %tmp0 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load float, ptr %tmp0, align 4
+  %tmp5 = load float, ptr %tmp1, align 4
+  %tmp6 = load float, ptr %tmp2, align 4
+  %tmp7 = load float, ptr %tmp3, align 4
+  store float 0.0, ptr %tmp0, align 4
+  store float 0.0, ptr %tmp1, align 4
+  store float 0.0, ptr %tmp2, align 4
+  store float 0.0, ptr %tmp3, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -1046,60 +1046,60 @@ for.end:
 }
 
 %f64.4 = type {double, double, double, double}
-define void @f64_factor_4(%f64.4* %data, i64 %n) #0 {
+define void @f64_factor_4(ptr %data, i64 %n) #0 {
 entry:
   br label %for.body
 
 ; VF_2-LABEL:  Checking a loop in 'f64_factor_4'
-; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_4'
-; VF_4:          Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_4:          Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_4'
-; VF_8:          Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_8:          Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_4'
-; VF_16:         Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, double* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, double* %tmp3, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store double 0.000000e+00, double* %tmp3, align 8
+; VF_16:         Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 0
-  %tmp1 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 1
-  %tmp2 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 2
-  %tmp3 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 3
-  %tmp4 = load double, double* %tmp0, align 8
-  %tmp5 = load double, double* %tmp1, align 8
-  %tmp6 = load double, double* %tmp2, align 8
-  %tmp7 = load double, double* %tmp3, align 8
-  store double 0.0, double* %tmp0, align 8
-  store double 0.0, double* %tmp1, align 8
-  store double 0.0, double* %tmp2, align 8
-  store double 0.0, double* %tmp3, align 8
+  %tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0
+  %tmp1 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 1
+  %tmp2 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 2
+  %tmp3 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 3
+  %tmp4 = load double, ptr %tmp0, align 8
+  %tmp5 = load double, ptr %tmp1, align 8
+  %tmp6 = load double, ptr %tmp2, align 8
+  %tmp7 = load double, ptr %tmp3, align 8
+  store double 0.0, ptr %tmp0, align 8
+  store double 0.0, ptr %tmp1, align 8
+  store double 0.0, ptr %tmp2, align 8
+  store double 0.0, ptr %tmp3, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
index fe02be7..d4bb587 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-maskedldst.ll
@@ -4,8 +4,8 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-none-eabi"
 
 ; CHECK-LABEL: test_i32_align4
-; CHECK: call void @llvm.masked.store.v4i32.p0v4i32
-define void @test_i32_align4(i32* nocapture %A, i32 %n) #0 {
+; CHECK: call void @llvm.masked.store.v4i32.p0
+define void @test_i32_align4(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -15,14 +15,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.013
+  %0 = load i32, ptr %arrayidx, align 4
   %.off = add i32 %0, 9
   %1 = icmp ult i32 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4
+  store i32 0, ptr %arrayidx, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -39,7 +39,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 ; CHECK-LABEL: test_i32_align2
 ; CHECK-NOT: call void @llvm.masked.store
-define void @test_i32_align2(i32* nocapture %A, i32 %n) #0 {
+define void @test_i32_align2(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -49,14 +49,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013
-  %0 = load i32, i32* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.013
+  %0 = load i32, ptr %arrayidx, align 2
   %.off = add i32 %0, 9
   %1 = icmp ult i32 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 2
+  store i32 0, ptr %arrayidx, align 2
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -72,8 +72,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK-LABEL: test_i32_noalign
-; CHECK: call void @llvm.masked.store.v4i32.p0v4i32
-define void @test_i32_noalign(i32* nocapture %A, i32 %n) #0 {
+; CHECK: call void @llvm.masked.store.v4i32.p0
+define void @test_i32_noalign(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -83,14 +83,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.013
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.013
+  %0 = load i32, ptr %arrayidx
   %.off = add i32 %0, 9
   %1 = icmp ult i32 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx
+  store i32 0, ptr %arrayidx
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -106,8 +106,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; CHECK-LABEL: test_i16_align2
-; CHECK: call void @llvm.masked.store.v8i16.p0v8i16
-define void @test_i16_align2(i16* nocapture %A, i32 %n) #0 {
+; CHECK: call void @llvm.masked.store.v8i16.p0
+define void @test_i16_align2(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -117,14 +117,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.013
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %A, i32 %i.013
+  %0 = load i16, ptr %arrayidx, align 2
   %.off = add i16 %0, 9
   %1 = icmp ult i16 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i16 0, i16* %arrayidx, align 2
+  store i16 0, ptr %arrayidx, align 2
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
@@ -141,7 +141,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 ; CHECK-LABEL: test_i16_align1
 ; CHECK-NOT: call void @llvm.masked.store
-define void @test_i16_align1(i16* nocapture %A, i32 %n) #0 {
+define void @test_i16_align1(ptr nocapture %A, i32 %n) #0 {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
@@ -151,14 +151,14 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %i.013 = phi i32 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.013
-  %0 = load i16, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i16, ptr %A, i32 %i.013
+  %0 = load i16, ptr %arrayidx, align 1
   %.off = add i16 %0, 9
   %1 = icmp ult i16 %.off, 19
   br i1 %1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i16 0, i16* %arrayidx, align 1
+  store i16 0, ptr %arrayidx, align 1
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll
index d4f1760..3c3f8de 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-predstorecost.ll
@@ -8,7 +8,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; should never choose scalar load/store, and the cost of gather/scatter may be
 ; high enough to make vectorization unwarranted.
 
-define i32 @nested(float* nocapture %pG, float* nocapture readonly %pA, i32 %n, i32 %ii) #0 {
+define i32 @nested(ptr nocapture %pG, ptr nocapture readonly %pA, i32 %n, i32 %ii) #0 {
 ; CHECK-LABEL: @nested(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP66:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -28,23 +28,23 @@ define i32 @nested(float* nocapture %pG, float* nocapture readonly %pA, i32 %n,
 ; CHECK-NEXT:    [[J_065_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_COND8_FOR_COND_CLEANUP10_CRIT_EDGE_US:%.*]] ], [ [[I_067]], [[FOR_BODY4_US_PREHEADER]] ]
 ; CHECK-NEXT:    [[MUL_US:%.*]] = mul nsw i32 [[J_065_US]], [[N]]
 ; CHECK-NEXT:    [[ADD_US:%.*]] = add nsw i32 [[MUL_US]], [[I_067]]
-; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, float* [[PA:%.*]], i32 [[ADD_US]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX_US]], align 4
-; CHECK-NEXT:    [[ARRAYIDX7_US:%.*]] = getelementptr inbounds float, float* [[PG:%.*]], i32 [[ADD_US]]
-; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX7_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, ptr [[PA:%.*]], i32 [[ADD_US]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7_US:%.*]] = getelementptr inbounds float, ptr [[PG:%.*]], i32 [[ADD_US]]
+; CHECK-NEXT:    store float [[TMP0]], ptr [[ARRAYIDX7_US]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY11_US:%.*]]
 ; CHECK:       for.body11.us:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_BODY4_US]] ], [ [[SUB_US:%.*]], [[FOR_BODY11_US]] ]
 ; CHECK-NEXT:    [[K_063_US:%.*]] = phi i32 [ 0, [[FOR_BODY4_US]] ], [ [[INC_US:%.*]], [[FOR_BODY11_US]] ]
 ; CHECK-NEXT:    [[ADD16_US:%.*]] = add nsw i32 [[K_063_US]], [[MUL15]]
-; CHECK-NEXT:    [[ARRAYIDX17_US:%.*]] = getelementptr inbounds float, float* [[PG]], i32 [[ADD16_US]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX17_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX17_US:%.*]] = getelementptr inbounds float, ptr [[PG]], i32 [[ADD16_US]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX17_US]], align 4
 ; CHECK-NEXT:    [[ADD19_US:%.*]] = add nsw i32 [[K_063_US]], [[MUL_US]]
-; CHECK-NEXT:    [[ARRAYIDX20_US:%.*]] = getelementptr inbounds float, float* [[PG]], i32 [[ADD19_US]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[ARRAYIDX20_US]], align 4
+; CHECK-NEXT:    [[ARRAYIDX20_US:%.*]] = getelementptr inbounds float, ptr [[PG]], i32 [[ADD19_US]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX20_US]], align 4
 ; CHECK-NEXT:    [[MUL21_US:%.*]] = fmul fast float [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[SUB_US]] = fsub fast float [[TMP1]], [[MUL21_US]]
-; CHECK-NEXT:    store float [[SUB_US]], float* [[ARRAYIDX7_US]], align 4
+; CHECK-NEXT:    store float [[SUB_US]], ptr [[ARRAYIDX7_US]], align 4
 ; CHECK-NEXT:    [[INC_US]] = add nuw nsw i32 [[K_063_US]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_US]], [[I_067]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND8_FOR_COND_CLEANUP10_CRIT_EDGE_US]], label [[FOR_BODY11_US]]
@@ -67,10 +67,10 @@ define i32 @nested(float* nocapture %pG, float* nocapture readonly %pA, i32 %n,
 ; CHECK:       for.body4:
 ; CHECK-NEXT:    [[J_065:%.*]] = phi i32 [ [[INC26:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_BODY4_PREHEADER]] ]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[J_065]], [[N]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[PA]], i32 [[MUL]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[PG]], i32 [[MUL]]
-; CHECK-NEXT:    store float [[TMP4]], float* [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PA]], i32 [[MUL]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[PG]], i32 [[MUL]]
+; CHECK-NEXT:    store float [[TMP4]], ptr [[ARRAYIDX7]], align 4
 ; CHECK-NEXT:    [[INC26]] = add nuw nsw i32 [[J_065]], 1
 ; CHECK-NEXT:    [[EXITCOND72_NOT:%.*]] = icmp eq i32 [[INC26]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND72_NOT]], label [[FOR_COND_CLEANUP3_LOOPEXIT:%.*]], label [[FOR_BODY4]]
@@ -89,24 +89,24 @@ for.body4.us:                                     ; preds = %for.body4.lr.ph, %f
   %j.065.us = phi i32 [ %inc26.us, %for.cond8.for.cond.cleanup10_crit_edge.us ], [ %i.067, %for.body4.lr.ph ]
   %mul.us = mul nsw i32 %j.065.us, %n
   %add.us = add nsw i32 %mul.us, %i.067
-  %arrayidx.us = getelementptr inbounds float, float* %pA, i32 %add.us
-  %0 = load float, float* %arrayidx.us, align 4
-  %arrayidx7.us = getelementptr inbounds float, float* %pG, i32 %add.us
-  store float %0, float* %arrayidx7.us, align 4
+  %arrayidx.us = getelementptr inbounds float, ptr %pA, i32 %add.us
+  %0 = load float, ptr %arrayidx.us, align 4
+  %arrayidx7.us = getelementptr inbounds float, ptr %pG, i32 %add.us
+  store float %0, ptr %arrayidx7.us, align 4
   br label %for.body11.us
 
 for.body11.us:                                    ; preds = %for.body4.us, %for.body11.us
   %1 = phi float [ %0, %for.body4.us ], [ %sub.us, %for.body11.us ]
   %k.063.us = phi i32 [ 0, %for.body4.us ], [ %inc.us, %for.body11.us ]
   %add16.us = add nsw i32 %k.063.us, %mul15
-  %arrayidx17.us = getelementptr inbounds float, float* %pG, i32 %add16.us
-  %2 = load float, float* %arrayidx17.us, align 4
+  %arrayidx17.us = getelementptr inbounds float, ptr %pG, i32 %add16.us
+  %2 = load float, ptr %arrayidx17.us, align 4
   %add19.us = add nsw i32 %k.063.us, %mul.us
-  %arrayidx20.us = getelementptr inbounds float, float* %pG, i32 %add19.us
-  %3 = load float, float* %arrayidx20.us, align 4
+  %arrayidx20.us = getelementptr inbounds float, ptr %pG, i32 %add19.us
+  %3 = load float, ptr %arrayidx20.us, align 4
   %mul21.us = fmul fast float %3, %2
   %sub.us = fsub fast float %1, %mul21.us
-  store float %sub.us, float* %arrayidx7.us, align 4
+  store float %sub.us, ptr %arrayidx7.us, align 4
   %inc.us = add nuw nsw i32 %k.063.us, 1
   %exitcond.not = icmp eq i32 %inc.us, %i.067
   br i1 %exitcond.not, label %for.cond8.for.cond.cleanup10_crit_edge.us, label %for.body11.us
@@ -127,10 +127,10 @@ for.cond.cleanup3:                                ; preds = %for.cond8.for.cond.
 for.body4:                                        ; preds = %for.body4.lr.ph, %for.body4
   %j.065 = phi i32 [ %inc26, %for.body4 ], [ 0, %for.body4.lr.ph ]
   %mul = mul nsw i32 %j.065, %n
-  %arrayidx = getelementptr inbounds float, float* %pA, i32 %mul
-  %4 = load float, float* %arrayidx, align 4
-  %arrayidx7 = getelementptr inbounds float, float* %pG, i32 %mul
-  store float %4, float* %arrayidx7, align 4
+  %arrayidx = getelementptr inbounds float, ptr %pA, i32 %mul
+  %4 = load float, ptr %arrayidx, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %pG, i32 %mul
+  store float %4, ptr %arrayidx7, align 4
   %inc26 = add nuw nsw i32 %j.065, 1
   %exitcond72.not = icmp eq i32 %inc26, %n
   br i1 %exitcond72.not, label %for.cond.cleanup3, label %for.body4
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
index f8247aa..eee9501 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-none-none-eabi"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -14,9 +14,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -38,8 +37,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i32 %indvars.iv, 1
   %exitcond = icmp eq i32 %indvars.iv.next, 257
@@ -50,7 +49,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -61,12 +60,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[VEC_IND]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]]
@@ -94,10 +91,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l7 = add i32 %sum.02, %indvars.iv
   %l8 = add i32 %l7, %l3
   %l9 = add i32 %l8, %l5
@@ -110,7 +107,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -120,12 +117,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_PHI]]
@@ -149,10 +144,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l8 = mul i32 %prod.02, %l3
   %l9 = mul i32 %l8, %l5
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -164,7 +159,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -174,12 +169,10 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    [[TMP6]] = and <4 x i32> [[VEC_PHI]], [[TMP5]]
@@ -203,10 +196,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -218,7 +211,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -228,12 +221,10 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6]] = or <4 x i32> [[VEC_PHI]], [[TMP5]]
@@ -257,10 +248,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -272,7 +263,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -282,12 +273,10 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6]] = xor <4 x i32> [[VEC_PHI]], [[TMP5]]
@@ -311,10 +300,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -326,7 +315,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -336,12 +325,10 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast <4 x float> [[TMP4]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP6]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP5]], <4 x float> [[VEC_PHI]]
@@ -365,10 +352,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -380,7 +367,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -390,12 +377,10 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], [[WIDE_MASKED_LOAD1]]
 ; CHECK-NEXT:    [[TMP6]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP5]], <4 x float> [[VEC_PHI]]
@@ -419,10 +404,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -434,7 +419,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -443,9 +428,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -460,8 +444,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP5]] = call i32 @llvm.smin.i32(i32 [[RESULT_08]], i32 [[L0]])
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257
@@ -476,8 +460,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -489,7 +473,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -498,9 +482,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -515,8 +498,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP5]] = call i32 @llvm.umax.i32(i32 [[RESULT_08]], i32 [[L0]])
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257
@@ -531,8 +514,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -544,15 +527,15 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fmax(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmax(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmax(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ 1.000000e+03, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[C0:%.*]] = fcmp ogt float [[RESULT_08]], [[L0]]
 ; CHECK-NEXT:    [[V0]] = select i1 [[C0]], float [[RESULT_08]], float [[L0]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
@@ -567,8 +550,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %v0, %for.body ], [ 1000.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
   %c0 = fcmp ogt float %result.08, %l0
   %v0 = select i1 %c0, float %result.08, float %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
index bb6bd35..33599ab 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
 
 ; Should not be vectorized
-define i64 @add_i64_i64(i64* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i64_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i64_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -13,8 +13,8 @@ define i64 @add_i64_i64(i64* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[X:%.*]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[TMP0]], [[R_07]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
@@ -30,8 +30,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %x, i32 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %x, i32 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %r.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -44,7 +44,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VADDLV
 ; FIXME: TailPredicate
-define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i32_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i32_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -58,9 +58,8 @@ define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
 ; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP3]], [[VEC_PHI]]
@@ -77,8 +76,8 @@ define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
@@ -95,8 +94,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %conv = sext i32 %0 to i64
   %add = add nsw i64 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -110,7 +109,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VADDLV
 ; FIXME: TailPredicate
-define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i16_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i16_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -124,9 +123,8 @@ define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
 ; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP3]], [[VEC_PHI]]
@@ -143,8 +141,8 @@ define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP6]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
@@ -161,8 +159,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.08
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.08
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i64
   %add = add nsw i64 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -176,7 +174,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VADDLV
 ; FIXME: TailPredicate
-define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 {
+define i64 @add_i8_i64(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -190,9 +188,8 @@ define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
 ; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP3]], [[VEC_PHI]]
@@ -209,8 +206,8 @@ define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_08]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP6]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i64 [[R_07]], [[CONV]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
@@ -227,8 +224,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i64
   %add = add nuw nsw i64 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -241,7 +238,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 4x to use VADDV.u32
-define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
+define i32 @add_i32_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i32_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -254,9 +251,8 @@ define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -273,8 +269,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %r.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -286,7 +282,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VADDV.u16
-define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
+define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i16_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -299,9 +295,8 @@ define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP2]], <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP3]])
@@ -320,8 +315,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.08
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.08
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
   %add = add nsw i32 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -334,7 +329,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VADDV.u16
-define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
+define i32 @add_i8_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -347,9 +342,8 @@ define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP2]], <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP3]])
@@ -368,8 +362,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nuw nsw i32 %r.07, %conv
   %inc = add nuw nsw i32 %i.08, 1
@@ -382,7 +376,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VADDV.u16
-define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
+define signext i16 @add_i16_i16(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i16_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -395,9 +389,8 @@ define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i16 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
@@ -414,8 +407,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.010
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.010
+  %0 = load i16, ptr %arrayidx, align 2
   %add = add i16 %0, %r.09
   %inc = add nuw nsw i32 %i.010, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -427,7 +420,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VADDV.u8
-define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
+define signext i16 @add_i8_i16(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -440,9 +433,8 @@ define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i16> [[TMP2]], <16 x i16> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[TMP3]])
@@ -461,8 +453,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.010
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.010
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i16
   %add = add i16 %r.09, %conv
   %inc = add nuw nsw i32 %i.010, 1
@@ -475,7 +467,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VADDV.u8
-define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
+define zeroext i8 @add_i8_i8(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-LABEL: @add_i8_i8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -488,9 +480,8 @@ define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> zeroinitializer)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[WIDE_MASKED_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i8 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
@@ -507,8 +498,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.08 = phi i8 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.09
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.09
+  %0 = load i8, ptr %arrayidx, align 1
   %add = add i8 %0, %r.08
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -520,7 +511,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; Not vectorized
-define i64 @mla_i64_i64(i64* nocapture readonly %x, i64* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i64_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i64_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -528,10 +519,10 @@ define i64 @mla_i64_i64(i64* nocapture readonly %x, i64* nocapture readonly %y,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[Y:%.*]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[X:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[Y:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[MUL]], [[R_09]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
@@ -548,10 +539,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %x, i32 %i.010
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds i64, i64* %y, i32 %i.010
-  %1 = load i64, i64* %arrayidx1, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %x, i32 %i.010
+  %0 = load i64, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %y, i32 %i.010
+  %1 = load i64, ptr %arrayidx1, align 8
   %mul = mul nsw i64 %1, %0
   %add = add nsw i64 %mul, %r.09
   %inc = add nuw nsw i32 %i.010, 1
@@ -565,7 +556,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 4x to use VMLAL.u32
 ; FIXME: TailPredicate
-define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i32_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i32_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -579,12 +570,10 @@ define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y,
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i32> [[TMP4]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP5]])
@@ -602,10 +591,10 @@ define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[Y]], i32 [[I_010]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]]
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_09]], [[CONV]]
@@ -623,10 +612,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.010
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %y, i32 %i.010
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.010
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %y, i32 %i.010
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %conv = sext i32 %mul to i64
   %add = add nsw i64 %r.09, %conv
@@ -641,7 +630,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 8x to use VMLAL.u16
 ; FIXME: TailPredicate
-define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i16_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i16_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -655,13 +644,11 @@ define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y,
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i16> [[WIDE_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sext <8 x i32> [[TMP6]] to <8 x i64>
@@ -680,11 +667,11 @@ define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y,
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP11]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[Y]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[Y]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP12]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[CONV3:%.*]] = sext i32 [[MUL]] to i64
@@ -703,11 +690,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.011 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.012
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.012
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.012
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.012
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %conv3 = sext i32 %mul to i64
@@ -723,7 +710,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; 8x to use VMLAL.u16
 ; FIXME: TailPredicate
-define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define i64 @mla_i8_i64(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -737,13 +724,11 @@ define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <8 x i32> [[TMP6]] to <8 x i64>
@@ -762,11 +747,11 @@ define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[X]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP11]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_012]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[Y]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP12]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[CONV3:%.*]] = zext i32 [[MUL]] to i64
@@ -785,11 +770,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.011 = phi i64 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.012
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.012
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.012
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.012
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i32
   %mul = mul nuw nsw i32 %conv2, %conv
   %conv3 = zext i32 %mul to i64
@@ -804,7 +789,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 4x to use VMLA.i32
-define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i32_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i32_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -817,12 +802,10 @@ define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y,
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
@@ -841,10 +824,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.09 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.010
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %y, i32 %i.010
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.010
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %y, i32 %i.010
+  %1 = load i32, ptr %arrayidx1, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %r.09
   %inc = add nuw nsw i32 %i.010, 1
@@ -857,7 +840,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VMLA.i16
-define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i16_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i16_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -870,13 +853,11 @@ define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y,
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP4]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP6]], <8 x i32> zeroinitializer
@@ -896,11 +877,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.011
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.011
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.011
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.011
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %add = add nsw i32 %mul, %r.010
@@ -914,7 +895,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VMLA.i8
-define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i8_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -927,13 +908,11 @@ define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD1]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <16 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP6]], <16 x i32> zeroinitializer
@@ -953,11 +932,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.011
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.011
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i32
   %mul = mul nuw nsw i32 %conv2, %conv
   %add = add nuw nsw i32 %mul, %r.010
@@ -971,7 +950,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x to use VMLA.i16
-define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+define signext i16 @mla_i16_i16(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i16_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -984,12 +963,10 @@ define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture reado
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP1]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP2]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <8 x i16> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP4]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP5]])
@@ -1008,10 +985,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.012 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.013
-  %0 = load i16, i16* %arrayidx, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.013
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i32 %i.013
+  %0 = load i16, ptr %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %y, i32 %i.013
+  %1 = load i16, ptr %arrayidx1, align 2
   %mul = mul i16 %1, %0
   %add = add i16 %mul, %r.012
   %inc = add nuw nsw i32 %i.013, 1
@@ -1024,7 +1001,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VMLA.i8
-define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define signext i16 @mla_i8_i16(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1037,13 +1014,11 @@ define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP4]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD1]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw <16 x i16> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i16> [[TMP6]], <16 x i16> zeroinitializer
@@ -1063,11 +1038,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.012 = phi i16 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.013
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.013
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i16
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.013
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.013
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = zext i8 %1 to i16
   %mul = mul nuw i16 %conv2, %conv
   %add = add i16 %mul, %r.012
@@ -1081,7 +1056,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 16x to use VMLA.i8
-define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define zeroext i8 @mla_i8_i8(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1094,12 +1069,10 @@ define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i8 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP4]], <16 x i8> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[TMP5]])
@@ -1118,10 +1091,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.011 = phi i8 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.012
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.012
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.012
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %y, i32 %i.012
+  %1 = load i8, ptr %arrayidx1, align 1
   %mul = mul i8 %1, %0
   %add = add i8 %mul, %r.011
   %inc = add nuw nsw i32 %i.012, 1
@@ -1134,7 +1107,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 }
 
 ; 8x as different types
-define i32 @red_mla_ext_s8_s16_s32(i8* noalias nocapture readonly %A, i16* noalias nocapture readonly %B, i32 %n) #0 {
+define i32 @red_mla_ext_s8_s16_s32(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, i32 %n) #0 {
 ; CHECK-LABEL: @red_mla_ext_s8_s16_s32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -1147,13 +1120,11 @@ define i32 @red_mla_ext_s8_s16_s32(i8* noalias nocapture readonly %A, i16* noali
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* [[TMP1]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[TMP0]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[WIDE_MASKED_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* [[TMP4]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP3]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i16> [[WIDE_MASKED_LOAD1]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <8 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP6]], <8 x i32> zeroinitializer
@@ -1176,11 +1147,11 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %s.010 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %B, i32 %i.011
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %B, i32 %i.011
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %add = add nsw i32 %mul, %s.010
@@ -1198,7 +1169,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; 4x as different sext vs zext
-define i64 @red_mla_ext_s16_u16_s64(i16* noalias nocapture readonly %A, i16* noalias nocapture readonly %B, i32 %n) #0 {
+define i64 @red_mla_ext_s16_u16_s64(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, i32 %n) #0 {
 ; CHECK-LABEL: @red_mla_ext_s16_u16_s64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -1212,13 +1183,11 @@ define i64 @red_mla_ext_s16_u16_s64(i16* noalias nocapture readonly %A, i16* noa
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <4 x i32> [[TMP6]] to <4 x i64>
@@ -1237,11 +1206,11 @@ define i64 @red_mla_ext_s16_u16_s64(i16* noalias nocapture readonly %A, i16* noa
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[S_010:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[A]], i32 [[I_011]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP11]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[I_011]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[ARRAYIDX1]], align 2
 ; CHECK-NEXT:    [[CONV2:%.*]] = zext i16 [[TMP12]] to i32
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = zext i32 [[MUL]] to i64
@@ -1263,11 +1232,11 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %s.010 = phi i64 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %A, i32 %i.011
-  %0 = load i16, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i16, ptr %A, i32 %i.011
+  %0 = load i16, ptr %arrayidx, align 1
   %conv = sext i16 %0 to i32
-  %arrayidx1 = getelementptr inbounds i16, i16* %B, i32 %i.011
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %B, i32 %i.011
+  %1 = load i16, ptr %arrayidx1, align 2
   %conv2 = zext i16 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %mul2 = zext i32 %mul to i64
@@ -1286,7 +1255,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; 4x as different sext vs zext
-define i32 @red_mla_u8_s8_u32(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i32 %n) #0 {
+define i32 @red_mla_u8_s8_u32(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, i32 %n) #0 {
 ; CHECK-LABEL: @red_mla_u8_s8_u32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -1299,13 +1268,11 @@ define i32 @red_mla_u8_s8_u32(i8* noalias nocapture readonly %A, i8* noalias noc
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[TMP0]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[WIDE_MASKED_LOAD]] to <4 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* [[TMP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[TMP3]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i8> [[WIDE_MASKED_LOAD1]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
@@ -1328,11 +1295,11 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %s.010 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i8, i8* %B, i32 %i.011
-  %1 = load i8, i8* %arrayidx1, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %B, i32 %i.011
+  %1 = load i8, ptr %arrayidx1, align 1
   %conv2 = sext i8 %1 to i32
   %mul = mul nsw i32 %conv2, %conv
   %add = add i32 %mul, %s.010
@@ -1350,7 +1317,7 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 ; Make sure interleave group members feeding in-loop reductions can be handled.
-define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 {
+define i32 @reduction_interleave_group(i32 %n, ptr %arr) #0 {
 ; CHECK-LABEL: @reduction_interleave_group(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[GUARD:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1370,10 +1337,9 @@ define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[OFFSET_IDX]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 -1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[TMP4]], i32 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <8 x i32>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 -1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 [[TMP3]]
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[STRIDED_VEC1]])
@@ -1394,10 +1360,10 @@ define i32 @reduction_interleave_group(i32 %n, i32* %arr) #0 {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RED_PHI:%.*]] = phi i32 [ [[RED_2:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ADD:%.*]] = or i32 [[IV]], 1
-; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i32 [[ADD]]
-; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[GEP_0]], align 4
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[ARR]], i32 [[IV]]
-; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_0:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[ADD]]
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, ptr [[GEP_0]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[IV]]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, ptr [[GEP_1]], align 4
 ; CHECK-NEXT:    [[RED_1:%.*]] = add i32 [[L_0]], [[RED_PHI]]
 ; CHECK-NEXT:    [[RED_2]] = add i32 [[RED_1]], [[L_1]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 2
@@ -1415,10 +1381,10 @@ for.body:                                         ; preds = %for.body.preheader,
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %red.phi = phi i32 [ %red.2, %for.body ], [ 0, %entry ]
   %add = or i32 %iv, 1
-  %gep.0 = getelementptr inbounds i32, i32* %arr, i32 %add
-  %l.0 = load i32, i32* %gep.0, align 4
-  %gep.1 = getelementptr inbounds i32, i32* %arr, i32 %iv
-  %l.1 = load i32, i32* %gep.1, align 4
+  %gep.0 = getelementptr inbounds i32, ptr %arr, i32 %add
+  %l.0 = load i32, ptr %gep.0, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %arr, i32 %iv
+  %l.1 = load i32, ptr %gep.1, align 4
   %red.1 = add i32 %l.0, %red.phi
   %red.2 = add i32 %red.1, %l.1
   %iv.next = add nuw nsw i32 %iv, 2
@@ -1431,7 +1397,7 @@ exit:
 }
 
 ; 16x to use VMLA.i8, same as mla_i8_i32 with multiple uses of the ext `add(mul(x, x))`
-define i32 @mla_i8_i32_multiuse(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+define i32 @mla_i8_i32_multiuse(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) #0 {
 ; CHECK-LABEL: @mla_i8_i32_multiuse(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1444,9 +1410,8 @@ define i32 @mla_i8_i32_multiuse(i8* nocapture readonly %x, i8* nocapture readonl
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP1]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw <16 x i32> [[TMP2]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP3]], <16 x i32> zeroinitializer
@@ -1466,8 +1431,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.011
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %x, i32 %i.011
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %mul = mul nuw nsw i32 %conv, %conv
   %add = add nuw nsw i32 %mul, %r.010
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
index 2b806fb..a0ff762 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-shiftcost.ll
@@ -12,14 +12,14 @@ target triple = "thumbv8.1m.main-none-none-eabi"
 ; CHECK-COST: LV: Found an estimated cost of 2 for VF 4 For instruction:   %l45 = and i32 %and515, 131072
 ; CHECK-NOT: vector.body
 
-define void @test([101 x i32] *%src, i32 %N) #0 {
+define void @test(ptr %src, i32 %N) #0 {
 entry:
   br label %for.body386
 
 for.body386:                                      ; preds = %entry, %l77
   %add387 = phi i32 [ %inc532, %l77 ], [ 0, %entry ]
-  %arrayidx388 = getelementptr inbounds [101 x i32], [101 x i32]* %src, i32 0, i32 %add387
-  %l41 = load i32, i32* %arrayidx388, align 4
+  %arrayidx388 = getelementptr inbounds [101 x i32], ptr %src, i32 0, i32 %add387
+  %l41 = load i32, ptr %arrayidx388, align 4
   %l42 = and i32 %l41, 65535
   %l43 = icmp eq i32 %l42, 0
   br i1 %l43, label %l77, label %l44
@@ -72,7 +72,7 @@ l44:                                               ; preds = %for.body386
   %and524 = shl i32 %l41, 1
   %l75 = and i32 %and524, 65536
   %l76 = or i32 %l75, %l74
-  store i32 %l76, i32* %arrayidx388, align 4
+  store i32 %l76, ptr %arrayidx388, align 4
   br label %l77
 
 l77:                                               ; preds = %for.body386, %l44
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
index 6f5b7f5..ed62540 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
@@ -52,19 +52,19 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define void @prefer_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @prefer_folding(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    prefer_folding(
 ; PREFER-FOLDING: vector.body:
 ; PREFER-FOLDING: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; PREFER-FOLDING: %[[VIVELEM0:.*]] = add i32 %index, 0
 ; PREFER-FOLDING: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[VIVELEM0]], i32 431)
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask,
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask,
-; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask,
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask,
+; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 ;
-; NO-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(
-; NO-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0v4i32(
+; NO-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0(
+; NO-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0(
 ; NO-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %for.body
 entry:
   br label %for.body
@@ -74,27 +74,27 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @mixed_types(i16* noalias nocapture %A, i16* noalias nocapture readonly %B, i16* noalias nocapture readonly %C, i32* noalias nocapture %D, i32* noalias nocapture readonly %E, i32* noalias nocapture readonly %F) #0 {
+define void @mixed_types(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture %D, ptr noalias nocapture readonly %E, ptr noalias nocapture readonly %F) #0 {
 ; CHECK-LABEL:        mixed_types(
 ; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0v4i16
-; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0v4i16
-; PREFER-FOLDING:     call void @llvm.masked.store.v4i16.p0v4i16
-; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING:     call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0
+; PREFER-FOLDING:     call <4 x i16> @llvm.masked.load.v4i16.p0
+; PREFER-FOLDING:     call void @llvm.masked.store.v4i16.p0
+; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING:     call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING:     call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -104,31 +104,31 @@ for.cond.cleanup:
 
 for.body:
   %i.018 = phi i32 [ 0, %entry ], [ %add9, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %i.018
-  %0 = load i16, i16* %arrayidx, align 2
-  %arrayidx1 = getelementptr inbounds i16, i16* %C, i32 %i.018
-  %1 = load i16, i16* %arrayidx1, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %i.018
+  %0 = load i16, ptr %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds i16, ptr %C, i32 %i.018
+  %1 = load i16, ptr %arrayidx1, align 2
   %add = add i16 %1, %0
-  %arrayidx4 = getelementptr inbounds i16, i16* %A, i32 %i.018
-  store i16 %add, i16* %arrayidx4, align 2
-  %arrayidx5 = getelementptr inbounds i32, i32* %E, i32 %i.018
-  %2 = load i32, i32* %arrayidx5, align 4
-  %arrayidx6 = getelementptr inbounds i32, i32* %F, i32 %i.018
-  %3 = load i32, i32* %arrayidx6, align 4
+  %arrayidx4 = getelementptr inbounds i16, ptr %A, i32 %i.018
+  store i16 %add, ptr %arrayidx4, align 2
+  %arrayidx5 = getelementptr inbounds i32, ptr %E, i32 %i.018
+  %2 = load i32, ptr %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %F, i32 %i.018
+  %3 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %3, %2
-  %arrayidx8 = getelementptr inbounds i32, i32* %D, i32 %i.018
-  store i32 %add7, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %D, i32 %i.018
+  store i32 %add7, ptr %arrayidx8, align 4
   %add9 = add nuw nsw i32 %i.018, 1
   %exitcond = icmp eq i32 %add9, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @zero_extending_load_allowed(i32* noalias nocapture %A, i8* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @zero_extending_load_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    zero_extending_load_allowed(
 ; PREFER-FOLDING: vector.body:
-; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0v4i8
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -138,25 +138,25 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %B, i32 %i.09
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %B, i32 %i.09
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %conv
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @sign_extending_load_allowed(i32* noalias nocapture %A, i8* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @sign_extending_load_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    sign_extending_load_allowed(
 ; PREFER-FOLDING: vector.body:
-; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0v4i8
-; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING: call <4 x i8> @llvm.masked.load.v4i8.p0
+; PREFER-FOLDING: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING: call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -166,22 +166,22 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %B, i32 %i.09
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %B, i32 %i.09
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %conv
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @narrowing_store_allowed(i8* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @narrowing_store_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    narrowing_store_allowed(
-; PREFER-FOLDING: call void @llvm.masked.store.v4i8.p0v4i8
+; PREFER-FOLDING: call void @llvm.masked.store.v4i8.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -191,14 +191,14 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %conv = trunc i32 %add to i8
-  %arrayidx2 = getelementptr inbounds i8, i8* %A, i32 %i.09
-  store i8 %conv, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %A, i32 %i.09
+  store i8 %conv, ptr %arrayidx2, align 1
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -217,11 +217,11 @@ entry:
 
 for.body:
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %inc, 1000
   br i1 %exitcond, label %for.body, label %for.end
@@ -243,11 +243,11 @@ entry:
 
 for.body:
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x float], [32 x float]* @ftab, i32 0, i32 %i.08
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [32 x float], ptr @ftab, i32 0, i32 %i.08
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp oeq float %0, 0.000000e+00
   %. = select i1 %cmp1, float 2.000000e+00, float 1.000000e+00
-  store float %., float* %arrayidx, align 4
+  store float %., ptr %arrayidx, align 4
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %inc, 999
   br i1 %exitcond, label %for.body, label %for.end
@@ -256,12 +256,12 @@ for.end:
   ret float 0.000000e+00
 }
 
-define void @pragma_vect_predicate_disable(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @pragma_vect_predicate_disable(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        pragma_vect_predicate_disable(
 ; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0v4i32
-; PREFER-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0v4i32
+; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING-NOT: call <4 x i32> @llvm.masked.load.v4i32.p0
+; PREFER-FOLDING-NOT: call void @llvm.masked.store.v4i32.p0
 ; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -271,19 +271,19 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !7
 }
 
-define void @stride_4(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+define void @stride_4(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        stride_4(
 ; PREFER-FOLDING:     vector.body:
 ; PREFER-FOLDING-NOT: llvm.masked.load
@@ -297,24 +297,24 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 4
   %cmp = icmp ult i32 %add3, 731
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !5
 }
 
-define dso_local void @half(half* noalias nocapture %A, half* noalias nocapture readonly %B, half* noalias nocapture readonly %C) #0 {
+define dso_local void @half(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    half(
 ; PREFER-FOLDING: vector.body:
-; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0v8f16
-; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0v8f16
-; PREFER-FOLDING: call void @llvm.masked.store.v8f16.p0v8f16
+; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0
+; PREFER-FOLDING: call <8 x half> @llvm.masked.load.v8f16.p0
+; PREFER-FOLDING: call void @llvm.masked.store.v8f16.p0
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
@@ -324,27 +324,27 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds half, half* %B, i32 %i.09
-  %0 = load half, half* %arrayidx, align 2
-  %arrayidx1 = getelementptr inbounds half, half* %C, i32 %i.09
-  %1 = load half, half* %arrayidx1, align 2
+  %arrayidx = getelementptr inbounds half, ptr %B, i32 %i.09
+  %0 = load half, ptr %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds half, ptr %C, i32 %i.09
+  %1 = load half, ptr %arrayidx1, align 2
   %add = fadd fast half %1, %0
-  %arrayidx2 = getelementptr inbounds half, half* %A, i32 %i.09
-  store half %add, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %A, i32 %i.09
+  store half %add, ptr %arrayidx2, align 2
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @float(float* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
+define void @float(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    float(
 ; PREFER-FOLDING: vector.body:
 ; PREFER-FOLDING: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; PREFER-FOLDING: %[[VIVELEM0:.*]] = add i32 %index, 0
 ; PREFER-FOLDING: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[VIVELEM0]], i32 431)
-; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0v4f32({{.*}}%active.lane.mask
-; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0v4f32({{.*}}%active.lane.mask
-; PREFER-FOLDING: call void @llvm.masked.store.v4f32.p0v4f32({{.*}}%active.lane.mask
+; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0({{.*}}%active.lane.mask
+; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0({{.*}}%active.lane.mask
+; PREFER-FOLDING: call void @llvm.masked.store.v4f32.p0({{.*}}%active.lane.mask
 ; PREFER-FOLDING: %index.next = add i32 %index, 4
 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
@@ -355,19 +355,19 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i32 %i.09
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i32 %i.09
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %C, i32 %i.09
+  %1 = load float, ptr %arrayidx1, align 4
   %add = fadd fast float %1, %0
-  %arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.09
-  store float %add, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i32 %i.09
+  store float %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
 }
 
-define void @fpext_allowed(float* noalias nocapture %A, half* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
+define void @fpext_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        fpext_allowed(
 ; PREFER-FOLDING:     vector.body:
 ; PREFER-FOLDING-NOT: llvm.masked.load
@@ -381,20 +381,20 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds half, half* %B, i32 %i.09
-  %0 = load half, half* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds half, ptr %B, i32 %i.09
+  %0 = load half, ptr %arrayidx, align 2
   %conv = fpext half %0 to float
-  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %C, i32 %i.09
+  %1 = load float, ptr %arrayidx1, align 4
   %add = fadd fast float %1, %conv
-  %arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.09
-  store float %add, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i32 %i.09
+  store float %add, ptr %arrayidx2, align 4
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @fptrunc_allowed(half* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
+define void @fptrunc_allowed(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        fptrunc_allowed(
 ; PREFER-FOLDING:     vector.body:
 ; PREFER-FOLDING-NOT: llvm.masked.load
@@ -408,14 +408,14 @@ for.cond.cleanup:
 
 for.body:
   %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i32 %i.09
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.09
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i32 %i.09
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %C, i32 %i.09
+  %1 = load float, ptr %arrayidx1, align 4
   %add = fadd fast float %1, %0
   %conv = fptrunc float %add to half
-  %arrayidx2 = getelementptr inbounds half, half* %A, i32 %i.09
-  store half %conv, half* %arrayidx2, align 2
+  %arrayidx2 = getelementptr inbounds half, ptr %A, i32 %i.09
+  store half %conv, ptr %arrayidx2, align 2
   %add3 = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
index 01be12b..596e42e 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll
@@ -4,15 +4,15 @@
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-none-none-eabi"
 
-define void @pred_loop(i32* %off, i32* %data, i32* %dst, i32 %n) #0 {
+define void @pred_loop(ptr %off, ptr %data, ptr %dst, i32 %n) #0 {
 
 ; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction:   %i.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %add = add nuw nsw i32 %i.09, 1
-; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, i32* %data, i32 %add
-; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, i32* %arrayidx, align 4
+; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, ptr %data, i32 %add
+; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, ptr %arrayidx, align 4
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %add1 = add nsw i32 %0, 5
-; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, i32* %dst, i32 %i.09
-; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add1, i32* %arrayidx2, align 4
+; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, ptr %dst, i32 %i.09
+; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add1, ptr %arrayidx2, align 4
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %exitcond.not = icmp eq i32 %add, %n
 ; CHECK-COST-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %exitcond.not, label %exit.loopexit, label %for.body
 ; CHECK-COST-NEXT: LV: Scalar loop costs: 5.
@@ -27,22 +27,22 @@ exit:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %entry, %for.body
   %i.09 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %add = add nuw nsw i32 %i.09, 1
-  %arrayidx = getelementptr inbounds i32, i32* %data, i32 %add
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %data, i32 %add
+  %0 = load i32, ptr %arrayidx, align 4
   %add1 = add nsw i32 %0, 5
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i32 %i.09
-  store i32 %add1, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i32 %i.09
+  store i32 %add1, ptr %arrayidx2, align 4
   %exitcond.not = icmp eq i32 %add, %n
   br i1 %exitcond.not, label %exit, label %for.body
 }
 
-define i32 @if_convert(i32* %a, i32* %b, i32 %start, i32 %end) #0 {
+define i32 @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 {
 
 ; CHECK-COST-2: LV: Found an estimated cost of 0 for VF 1 For instruction:   %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ]
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.032
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, i32* %arrayidx, align 4
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, i32* %b, i32 %i.032
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i32, i32* %arrayidx2, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.032
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %0 = load i32, ptr %arrayidx, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %arrayidx2 = getelementptr inbounds i32, ptr %b, i32 %i.032
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %1 = load i32, ptr %arrayidx2, align 4
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %cmp3 = icmp sgt i32 %0, %1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %cmp3, label %if.then, label %if.end
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %mul = mul nsw i32 %0, 5
@@ -50,10 +50,10 @@ define i32 @if_convert(i32* %a, i32* %b, i32 %start, i32 %end) #0 {
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %factor = shl i32 %add, 1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %sub = sub i32 %0, %1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %add7 = add i32 %sub, %factor
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add7, i32* %arrayidx2, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %add7, ptr %arrayidx2, align 4
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br label %if.end
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   %k.0 = phi i32 [ %add, %if.then ], [ %0, %for.body ]
-; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %k.0, i32* %arrayidx, align 4
+; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i32 %k.0, ptr %arrayidx, align 4
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %inc = add nsw i32 %i.032, 1
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 1 for VF 1 For instruction:   %exitcond.not = icmp eq i32 %inc, %end
 ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction:   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
@@ -74,10 +74,10 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %if.end
   %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.032
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i32 %i.032
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.032
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i32 %i.032
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp sgt i32 %0, %1
   br i1 %cmp3, label %if.then, label %if.end
 
@@ -87,12 +87,12 @@ if.then:                                          ; preds = %for.body
   %factor = shl i32 %add, 1
   %sub = sub i32 %0, %1
   %add7 = add i32 %sub, %factor
-  store i32 %add7, i32* %arrayidx2, align 4
+  store i32 %add7, ptr %arrayidx2, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %for.body
   %k.0 = phi i32 [ %add, %if.then ], [ %0, %for.body ]
-  store i32 %k.0, i32* %arrayidx, align 4
+  store i32 %k.0, ptr %arrayidx, align 4
   %inc = add nsw i32 %i.032, 1
   %exitcond.not = icmp eq i32 %inc, %end
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
index ef9272e..038725d 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocapture %minp, i32 %N) {
+define arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture %minp, i32 %N) {
 ; CHECK-LABEL: @minmaxval4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP26_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -18,9 +18,8 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]])
 ; CHECK-NEXT:    [[TMP3]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -39,14 +38,14 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    [[MAX_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[MIN_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    store i32 [[MIN_0_LCSSA]], i32* [[MINP:%.*]], align 4
+; CHECK-NEXT:    store i32 [[MIN_0_LCSSA]], ptr [[MINP:%.*]], align 4
 ; CHECK-NEXT:    ret i32 [[MAX_0_LCSSA]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[MIN_028:%.*]] = phi i32 [ [[TMP9]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[MAX_027:%.*]] = phi i32 [ [[TMP8]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_029]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_029]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TMP8]] = call i32 @llvm.smax.i32(i32 [[TMP7]], i32 [[MAX_027]])
 ; CHECK-NEXT:    [[TMP9]] = call i32 @llvm.smin.i32(i32 [[TMP7]], i32 [[MIN_028]])
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_029]], 1
@@ -60,15 +59,15 @@ entry:
 for.cond.cleanup:                                 ; preds = %for.body, %entry
   %max.0.lcssa = phi i32 [ -2147483648, %entry ], [ %cond, %for.body ]
   %min.0.lcssa = phi i32 [ 2147483647, %entry ], [ %cond9, %for.body ]
-  store i32 %min.0.lcssa, i32* %minp, align 4
+  store i32 %min.0.lcssa, ptr %minp, align 4
   ret i32 %max.0.lcssa
 
 for.body:                                         ; preds = %entry, %for.body
   %i.029 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %min.028 = phi i32 [ %cond9, %for.body ], [ 2147483647, %entry ]
   %max.027 = phi i32 [ %cond, %for.body ], [ -2147483648, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.029
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.029
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, %max.027
   %cond = select i1 %cmp1, i32 %0, i32 %max.027
   %cmp4 = icmp slt i32 %0, %min.028
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
index bed6f9a..9f9ee26 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
@@ -9,11 +9,11 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ;   ARMHWLoops: Trip count does not fit into 32bits
 ;   preferPredicateOverEpilogue: hardware-loop is not profitable.
 ;
-define dso_local void @tail_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) {
+define dso_local void @tail_folding(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) {
 ; CHECK-LABEL: tail_folding(
 ; CHECK:       vector.body:
-; CHECK-NOT:   call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(
-; CHECK-NOT:   call void @llvm.masked.store.v4i32.p0v4i32(
+; CHECK-NOT:   call <4 x i32> @llvm.masked.load.v4i32.p0(
+; CHECK-NOT:   call void @llvm.masked.store.v4i32.p0(
 ; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP1:![0-9]+]]
 ;
 ; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop [[SCALAR_LOOP1:![0-9]+]]
@@ -25,13 +25,13 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 430
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -40,16 +40,16 @@ for.body:
 ; The same test case but now with predicate.enable = true should get
 ; tail-folded.
 ;
-define dso_local void @predicate_loop_hint(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) {
+define dso_local void @predicate_loop_hint(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) {
 ; CHECK-LABEL: predicate_loop_hint(
 ; CHECK:       vector.body:
 ; CHECK:         %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:         %[[ELEM0:.*]] = add i64 %index, 0
 ; CHECK:         %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 430)
-; CHECK:         %[[WML1:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
-; CHECK:         %[[WML2:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
+; CHECK:         %[[WML1:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}<4 x i1> %active.lane.mask
+; CHECK:         %[[WML2:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}<4 x i1> %active.lane.mask
 ; CHECK:         %[[ADD:.*]] = add nsw <4 x i32> %[[WML2]], %[[WML1]]
-; CHECK:         call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask
+; CHECK:         call void @llvm.masked.store.v4i32.p0(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask
 ; CHECK:         %index.next = add i64 %index, 4
 ; CHECK:         br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP2:![0-9]+]]
 ;
@@ -62,13 +62,13 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 430
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !6
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
index 33693e9..8f3cfc5 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
@@ -11,11 +11,11 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; get tail-folded, except with -prefer-predicate-over-epilog which then
 ; overrules this.
 ;
-define dso_local void @flag_overrules_hint(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) local_unnamed_addr #0 {
+define dso_local void @flag_overrules_hint(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C) local_unnamed_addr #0 {
 ; CHECK-LABEL: flag_overrules_hint(
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.masked.load.v8i32.p0v8i32(
-; CHECK-NOT:   @llvm.masked.store.v8i32.p0v8i32(
+; CHECK-NOT:   @llvm.masked.load.v8i32.p0(
+; CHECK-NOT:   @llvm.masked.store.v8i32.p0(
 ; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
 
 ; PREDFLAG-LABEL: flag_overrules_hint(
@@ -23,10 +23,10 @@ define dso_local void @flag_overrules_hint(i32* noalias nocapture %A, i32* noali
 ; PREDFLAG:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; PREDFLAG:  %[[ELEM0:.*]] = add i64 %index, 0
 ; PREDFLAG:  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 430)
-; PREDFLAG:  %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
-; PREDFLAG:  %wide.masked.load1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  %wide.masked.load1 = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
 ; PREDFLAG:  %{{.*}} = add nsw <4 x i32> %wide.masked.load1, %wide.masked.load
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask
 ; PREDFLAG:  %index.next = add i64 %index, 4
 ; PREDFLAG:  %[[CMP:.*]] = icmp eq i64 %index.next, 432
 ; PREDFLAG:  br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0
@@ -38,19 +38,19 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 430
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
 }
 
-define dso_local void @interleave4(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+define dso_local void @interleave4(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
 ; PREDFLAG-LABEL: interleave4(
 ; PREDFLAG:  %[[ADD1:.*]] = add i32 %index, 0
 ; PREDFLAG:  %[[ADD2:.*]] = add i32 %index, 4
@@ -61,19 +61,19 @@ define dso_local void @interleave4(i32* noalias nocapture %A, i32* noalias nocap
 ; PREDFLAG:  %[[ALM3:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD3]], i32 %N)
 ; PREDFLAG:  %[[ALM4:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD4]], i32 %N)
 ;
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
 ;
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM1]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM2]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM3]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %[[ALM4]])
 ;
 entry:
   %cmp8 = icmp sgt i32 %N, 0
@@ -90,13 +90,13 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add nuw nsw i32 %i.09, 1
   %exitcond = icmp eq i32 %inc, %N
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !14
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
index 4cf2489..0313cd7 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
@@ -12,7 +12,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; Check that this reduction is allowed, except when reductions are disable on
 ; the command line.
 ;
-define dso_local i32 @i32_add_reduction(i32* noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+define dso_local i32 @i32_add_reduction(ptr noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: i32_add_reduction(
 ; COMMON:       entry:
 ; CHECK:        @llvm.get.active.lane.mask
@@ -36,8 +36,8 @@ for.cond.cleanup:
 for.body:
   %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %S.07 = phi i32 [ %add, %for.body ], [ 1, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %S.07
   %inc = add nuw nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %inc, %N
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll b/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
index b35d287..e5dd922 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -6,15 +6,15 @@ target triple = "thumbv7-apple-ios3.0.0"
 ;CHECK:foo_F32
 ;CHECK: <4 x float>
 ;CHECK:ret
-define float @foo_F32(float* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define float @foo_F32(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %prod.01 = phi float [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
-  %2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %3 = load float, float* %2, align 8
+  %2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %3 = load float, ptr %2, align 8
   %4 = fmul fast float %prod.01, %3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -29,15 +29,15 @@ define float @foo_F32(float* nocapture %A, i32 %n) nounwind uwtable readonly ssp
 ;CHECK:foo_I8
 ;CHECK: xor <16 x i8>
 ;CHECK:ret
-define signext i8 @foo_I8(i8* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define signext i8 @foo_I8(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %3 = load i8, i8* %2, align 1
+  %2 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  %3 = load i8, ptr %2, align 1
   %4 = xor i8 %3, %red.01
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
index f109ca8..de3dac0 100644
--- a/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
@@ -18,7 +18,7 @@ target triple = "hexagon"
 ; CHECK-NOT: load <{{.*}} x i32>
 
 
-define void @test1(i32* %arg, i32 %N) #0 {
+define void @test1(ptr %arg, i32 %N) #0 {
 entry:
   %tmp = alloca i8
   br label %loop
@@ -27,27 +27,27 @@ loop:                                              ; preds = %bb2, %bb
   %iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
   %idx.mul = mul nuw nsw i32 %iv, 7
   %idx.start = add nuw nsw i32 %idx.mul, 1
-  %tmp6 = getelementptr inbounds i32, i32* %arg, i32 %idx.start
-  %tmp7 = load i32, i32* %tmp6, align 4
+  %tmp6 = getelementptr inbounds i32, ptr %arg, i32 %idx.start
+  %tmp7 = load i32, ptr %tmp6, align 4
   %tmp8 = add nuw nsw i32 %idx.start, 1
-  %tmp9 = getelementptr inbounds i32, i32* %arg, i32 %tmp8
-  %tmp10 = load i32, i32* %tmp9, align 4
+  %tmp9 = getelementptr inbounds i32, ptr %arg, i32 %tmp8
+  %tmp10 = load i32, ptr %tmp9, align 4
   %tmp11 = add nuw nsw i32 %idx.start, 2
-  %tmp12 = getelementptr inbounds i32, i32* %arg, i32 %tmp11
-  %tmp13 = load i32, i32* %tmp12, align 4
+  %tmp12 = getelementptr inbounds i32, ptr %arg, i32 %tmp11
+  %tmp13 = load i32, ptr %tmp12, align 4
   %tmp14 = add nuw nsw i32 %idx.start, 3
-  %tmp15 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
-  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp15 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
+  %tmp16 = load i32, ptr %tmp15, align 4
   %tmp18 = add nuw nsw i32 %idx.start, 4
-  %tmp19 = getelementptr inbounds i32, i32* %arg, i32 %tmp18
-  %tmp20 = load i32, i32* %tmp19, align 4
+  %tmp19 = getelementptr inbounds i32, ptr %arg, i32 %tmp18
+  %tmp20 = load i32, ptr %tmp19, align 4
   %tmp21 = add nuw nsw i32 %idx.start, 5
-  %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp21
-  %tmp23 = load i32, i32* %tmp22, align 4
+  %tmp22 = getelementptr inbounds i32, ptr %arg, i32 %tmp21
+  %tmp23 = load i32, ptr %tmp22, align 4
   %tmp25 = add nuw nsw i32 %idx.start, 6
-  %tmp26 = getelementptr inbounds i32, i32* %arg, i32 %tmp25
-  %tmp27 = load i32, i32* %tmp26, align 4
-  store i8 0, i8* %tmp, align 1
+  %tmp26 = getelementptr inbounds i32, ptr %arg, i32 %tmp25
+  %tmp27 = load i32, ptr %tmp26, align 4
+  store i8 0, ptr %tmp, align 1
   %iv.next= add nuw nsw i32 %iv, 1
   %exit.cond = icmp eq i32 %iv.next, %N
   br i1 %exit.cond, label %exit, label %loop
@@ -62,7 +62,7 @@ exit:                                             ; preds = %loop
 ; CHECK-LABEL: @test2
 ; CHECK: vector.body:
 ; CHECK-NOT: load <{{.*}} x i32>
-define void @test2(i32* %arg) #1 {
+define void @test2(ptr %arg) #1 {
 entry:
   %tmp = alloca i8
   br label %loop
@@ -70,18 +70,18 @@ entry:
 loop:                                              ; preds = %bb2, %bb
   %iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
   %idx.start = mul nuw nsw i32 %iv, 5
-  %tmp6 = getelementptr inbounds i32, i32* %arg, i32 %idx.start
-  %tmp7 = load i32, i32* %tmp6, align 4
+  %tmp6 = getelementptr inbounds i32, ptr %arg, i32 %idx.start
+  %tmp7 = load i32, ptr %tmp6, align 4
   %tmp8 = add nuw nsw i32 %idx.start, 1
-  %tmp9 = getelementptr inbounds i32, i32* %arg, i32 %tmp8
-  %tmp10 = load i32, i32* %tmp9, align 4
+  %tmp9 = getelementptr inbounds i32, ptr %arg, i32 %tmp8
+  %tmp10 = load i32, ptr %tmp9, align 4
   %tmp11 = add nuw nsw i32 %idx.start, 2
-  %tmp12 = getelementptr inbounds i32, i32* %arg, i32 %tmp11
-  %tmp13 = load i32, i32* %tmp12, align 4
+  %tmp12 = getelementptr inbounds i32, ptr %arg, i32 %tmp11
+  %tmp13 = load i32, ptr %tmp12, align 4
   %tmp14 = add nuw nsw i32 %idx.start, 3
-  %tmp15 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
-  %tmp16 = load i32, i32* %tmp15, align 4
-  store i8 0, i8* %tmp, align 1
+  %tmp15 = getelementptr inbounds i32, ptr %arg, i32 %tmp14
+  %tmp16 = load i32, ptr %tmp15, align 4
+  store i8 0, ptr %tmp, align 1
   %iv.next= add nuw nsw i32 %iv, 1
   %exit.cond = icmp eq i32 %iv.next, 128
   br i1 %exit.cond, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
index 6f4a23c..4c7a7d3 100644
--- a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
@@ -16,7 +16,7 @@ entry:
 loop:
   %g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ]
   %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-  %0 = load i8, i8* undef, align 1
+  %0 = load i8, ptr undef, align 1
   %g.1.lcssa = add i32 %g.016, undef
   %iv.next = add nsw i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, 0
diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
index 78c98c7..4ff7ad9 100644
--- a/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
@@ -8,19 +8,18 @@
 target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
 target triple = "hexagon"
 
-%s.0 = type { i8*, i32, i32, i32, i32 }
+%s.0 = type { ptr, i32, i32, i32, i32 }
 
-@g0 = external dso_local local_unnamed_addr global %s.0**, align 4
+@g0 = external dso_local local_unnamed_addr global ptr, align 4
 
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #0
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #0
 
 ; Function Attrs: nounwind
-define hidden fastcc void @f0(i8* nocapture %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i8 zeroext %a5) unnamed_addr #1 {
+define hidden fastcc void @f0(ptr nocapture %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i8 zeroext %a5) unnamed_addr #1 {
 b0:
   %v0 = alloca [4 x [9 x i16]], align 8
-  %v1 = bitcast [4 x [9 x i16]]* %v0 to i8*
-  call void @llvm.lifetime.start.p0i8(i64 72, i8* nonnull %v1) #2
+  call void @llvm.lifetime.start.p0(i64 72, ptr nonnull %v0) #2
   %v2 = add i32 %a1, -2
   %v3 = add i32 %a3, -9
   %v4 = icmp ugt i32 %v2, %v3
@@ -28,12 +27,11 @@ b0:
   %v6 = add i32 %a4, -9
   %v7 = icmp ugt i32 %v5, %v6
   %v8 = or i1 %v4, %v7
-  %v9 = load %s.0**, %s.0*** @g0, align 4, !tbaa !1
+  %v9 = load ptr, ptr @g0, align 4, !tbaa !1
   %v10 = zext i8 %a5 to i32
-  %v11 = getelementptr inbounds %s.0*, %s.0** %v9, i32 %v10
-  %v12 = load %s.0*, %s.0** %v11, align 4, !tbaa !1
-  %v13 = getelementptr inbounds %s.0, %s.0* %v12, i32 0, i32 0
-  %v14 = load i8*, i8** %v13, align 4, !tbaa !5
+  %v11 = getelementptr inbounds ptr, ptr %v9, i32 %v10
+  %v12 = load ptr, ptr %v11, align 4, !tbaa !1
+  %v14 = load ptr, ptr %v12, align 4, !tbaa !5
   br i1 %v8, label %b1, label %b2
 
 b1:                                               ; preds = %b1, %b0
@@ -45,41 +43,41 @@ b1:                                               ; preds = %b1, %b0
   %v20 = select i1 %v17, i32 0, i32 %v19
   %v21 = mul i32 %v20, %a3
   %v22 = add i32 97, %v21
-  %v23 = getelementptr inbounds i8, i8* %v14, i32 %v22
-  %v24 = load i8, i8* %v23, align 1, !tbaa !8
+  %v23 = getelementptr inbounds i8, ptr %v14, i32 %v22
+  %v24 = load i8, ptr %v23, align 1, !tbaa !8
   %v25 = zext i8 %v24 to i32
   %v26 = add i32 101, %v21
-  %v27 = getelementptr inbounds i8, i8* %v14, i32 %v26
-  %v28 = load i8, i8* %v27, align 1, !tbaa !8
+  %v27 = getelementptr inbounds i8, ptr %v14, i32 %v26
+  %v28 = load i8, ptr %v27, align 1, !tbaa !8
   %v29 = zext i8 %v28 to i32
   %v30 = mul nsw i32 %v29, -5
   %v31 = add nsw i32 %v30, %v25
   %v32 = add i32 106, %v21
-  %v33 = getelementptr inbounds i8, i8* %v14, i32 %v32
-  %v34 = load i8, i8* %v33, align 1, !tbaa !8
+  %v33 = getelementptr inbounds i8, ptr %v14, i32 %v32
+  %v34 = load i8, ptr %v33, align 1, !tbaa !8
   %v35 = zext i8 %v34 to i32
   %v36 = mul nuw nsw i32 %v35, 20
   %v37 = add nsw i32 %v36, %v31
   %v38 = add i32 111, %v21
-  %v39 = getelementptr inbounds i8, i8* %v14, i32 %v38
-  %v40 = load i8, i8* %v39, align 1, !tbaa !8
+  %v39 = getelementptr inbounds i8, ptr %v14, i32 %v38
+  %v40 = load i8, ptr %v39, align 1, !tbaa !8
   %v41 = zext i8 %v40 to i32
   %v42 = mul nuw nsw i32 %v41, 20
   %v43 = add nsw i32 %v42, %v37
   %v44 = add i32 116, %v21
-  %v45 = getelementptr inbounds i8, i8* %v14, i32 %v44
-  %v46 = load i8, i8* %v45, align 1, !tbaa !8
+  %v45 = getelementptr inbounds i8, ptr %v14, i32 %v44
+  %v46 = load i8, ptr %v45, align 1, !tbaa !8
   %v47 = zext i8 %v46 to i32
   %v48 = mul nsw i32 %v47, -5
   %v49 = add nsw i32 %v48, %v43
   %v50 = add i32 120, %v21
-  %v51 = getelementptr inbounds i8, i8* %v14, i32 %v50
-  %v52 = load i8, i8* %v51, align 1, !tbaa !8
+  %v51 = getelementptr inbounds i8, ptr %v14, i32 %v50
+  %v52 = load i8, ptr %v51, align 1, !tbaa !8
   %v53 = zext i8 %v52 to i32
   %v54 = add nsw i32 %v49, %v53
   %v55 = trunc i32 %v54 to i16
-  %v56 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 0, i32 %v15
-  store i16 %v55, i16* %v56, align 2, !tbaa !9
+  %v56 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 0, i32 %v15
+  store i16 %v55, ptr %v56, align 2, !tbaa !9
   %v57 = mul nsw i32 %v35, -5
   %v58 = add nsw i32 %v57, %v29
   %v59 = add nsw i32 %v42, %v58
@@ -88,13 +86,13 @@ b1:                                               ; preds = %b1, %b0
   %v62 = mul nsw i32 %v53, -5
   %v63 = add nsw i32 %v62, %v61
   %v64 = add i32 125, %v21
-  %v65 = getelementptr inbounds i8, i8* %v14, i32 %v64
-  %v66 = load i8, i8* %v65, align 1, !tbaa !8
+  %v65 = getelementptr inbounds i8, ptr %v14, i32 %v64
+  %v66 = load i8, ptr %v65, align 1, !tbaa !8
   %v67 = zext i8 %v66 to i32
   %v68 = add nsw i32 %v63, %v67
   %v69 = trunc i32 %v68 to i16
-  %v70 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 1, i32 %v15
-  store i16 %v69, i16* %v70, align 2, !tbaa !9
+  %v70 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 1, i32 %v15
+  store i16 %v69, ptr %v70, align 2, !tbaa !9
   %v71 = mul nsw i32 %v41, -5
   %v72 = add nsw i32 %v71, %v35
   %v73 = add nsw i32 %v60, %v72
@@ -103,54 +101,54 @@ b1:                                               ; preds = %b1, %b0
   %v76 = mul nsw i32 %v67, -5
   %v77 = add nsw i32 %v76, %v75
   %v78 = add i32 130, %v21
-  %v79 = getelementptr inbounds i8, i8* %v14, i32 %v78
-  %v80 = load i8, i8* %v79, align 1, !tbaa !8
+  %v79 = getelementptr inbounds i8, ptr %v14, i32 %v78
+  %v80 = load i8, ptr %v79, align 1, !tbaa !8
   %v81 = zext i8 %v80 to i32
   %v82 = add nsw i32 %v77, %v81
   %v83 = trunc i32 %v82 to i16
-  %v84 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 2, i32 %v15
-  store i16 %v83, i16* %v84, align 2, !tbaa !9
+  %v84 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 2, i32 %v15
+  store i16 %v83, ptr %v84, align 2, !tbaa !9
   %v85 = add i32 92, %v21
-  %v86 = getelementptr inbounds i8, i8* %v14, i32 %v85
-  %v87 = load i8, i8* %v86, align 1, !tbaa !8
+  %v86 = getelementptr inbounds i8, ptr %v14, i32 %v85
+  %v87 = load i8, ptr %v86, align 1, !tbaa !8
   %v88 = zext i8 %v87 to i16
   %v89 = add i32 135, %v21
-  %v90 = getelementptr inbounds i8, i8* %v14, i32 %v89
-  %v91 = load i8, i8* %v90, align 1, !tbaa !8
+  %v90 = getelementptr inbounds i8, ptr %v14, i32 %v89
+  %v91 = load i8, ptr %v90, align 1, !tbaa !8
   %v92 = zext i8 %v91 to i16
   %v93 = mul nsw i16 %v92, -5
   %v94 = add nsw i16 %v93, %v88
   %v95 = add i32 140, %v21
-  %v96 = getelementptr inbounds i8, i8* %v14, i32 %v95
-  %v97 = load i8, i8* %v96, align 1, !tbaa !8
+  %v96 = getelementptr inbounds i8, ptr %v14, i32 %v95
+  %v97 = load i8, ptr %v96, align 1, !tbaa !8
   %v98 = zext i8 %v97 to i16
   %v99 = mul nuw nsw i16 %v98, 20
   %v100 = add nsw i16 %v99, %v94
   %v101 = add i32 145, %v21
-  %v102 = getelementptr inbounds i8, i8* %v14, i32 %v101
-  %v103 = load i8, i8* %v102, align 1, !tbaa !8
+  %v102 = getelementptr inbounds i8, ptr %v14, i32 %v101
+  %v103 = load i8, ptr %v102, align 1, !tbaa !8
   %v104 = zext i8 %v103 to i16
   %v105 = mul nuw nsw i16 %v104, 20
   %v106 = add i16 %v105, %v100
   %v107 = add i32 150, %v21
-  %v108 = getelementptr inbounds i8, i8* %v14, i32 %v107
-  %v109 = load i8, i8* %v108, align 1, !tbaa !8
+  %v108 = getelementptr inbounds i8, ptr %v14, i32 %v107
+  %v109 = load i8, ptr %v108, align 1, !tbaa !8
   %v110 = zext i8 %v109 to i16
   %v111 = mul nsw i16 %v110, -5
   %v112 = add i16 %v111, %v106
   %v113 = add i32 154, %v21
-  %v114 = getelementptr inbounds i8, i8* %v14, i32 %v113
-  %v115 = load i8, i8* %v114, align 1, !tbaa !8
+  %v114 = getelementptr inbounds i8, ptr %v14, i32 %v113
+  %v115 = load i8, ptr %v114, align 1, !tbaa !8
   %v116 = zext i8 %v115 to i16
   %v117 = add i16 %v112, %v116
-  %v118 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 3, i32 %v15
-  store i16 %v117, i16* %v118, align 2, !tbaa !9
+  %v118 = getelementptr inbounds [4 x [9 x i16]], ptr %v0, i32 0, i32 3, i32 %v15
+  store i16 %v117, ptr %v118, align 2, !tbaa !9
   %v119 = add nuw nsw i32 %v15, 1
   %v120 = icmp eq i32 %v119, 19
   br i1 %v120, label %b2, label %b1
 
 b2:                                               ; preds = %b1, %b0
-  call void @llvm.lifetime.end.p0i8(i64 72, i8* nonnull %v1) #2
+  call void @llvm.lifetime.end.p0(i64 72, ptr nonnull %v0) #2
   ret void
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll
index cb0888e..7121c85 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/interleave_IC.ll
@@ -2,10 +2,10 @@
 ; RUN: opt < %s -passes='loop-vectorize' -S -mcpu=pwr9 -interleave-small-loop-scalar-reduction=true 2>&1 | FileCheck %s
 
 ; CHECK-LABEL: vector.body
-; CHECK: load double, double*
-; CHECK-NEXT: load double, double*
-; CHECK-NEXT: load double, double*
-; CHECK-NEXT: load double, double*
+; CHECK: load double, ptr
+; CHECK-NEXT: load double, ptr
+; CHECK-NEXT: load double, ptr
+; CHECK-NEXT: load double, ptr
 
 ; CHECK: fmul fast double
 ; CHECK-NEXT: fmul fast double
@@ -20,38 +20,36 @@
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
 
-define dso_local void @test(i32*** %arg, double** %arg1) align 2 {
+define dso_local void @test(ptr %arg, ptr %arg1) align 2 {
 bb:
-  %tpm15 = load i32**, i32*** %arg, align 8
-  %tpm19 = load double*, double** %arg1, align 8
+  %tpm15 = load ptr, ptr %arg, align 8
+  %tpm19 = load ptr, ptr %arg1, align 8
   br label %bb22
 bb22:                                             ; preds = %bb33, %bb
   %tpm26 = add i64 0, 1
   %tpm10 = alloca i32, align 8
-  %tpm27 = getelementptr inbounds i32, i32* %tpm10, i64 %tpm26
-  %tpm28 = getelementptr inbounds i32*, i32** %tpm15, i64 0
-  %tpm29 = load i32*, i32** %tpm28, align 8
+  %tpm27 = getelementptr inbounds i32, ptr %tpm10, i64 %tpm26
+  %tpm29 = load ptr, ptr %tpm15, align 8
   %tpm17 = alloca double, align 8
-  %tpm32 = getelementptr inbounds double, double* %tpm17, i64 %tpm26
+  %tpm32 = getelementptr inbounds double, ptr %tpm17, i64 %tpm26
   br label %bb40
 bb33:                                             ; preds = %bb40
-  %tpm35 = getelementptr inbounds double, double* %tpm19, i64 0
   %tpm37 = fsub fast double 0.000000e+00, %tpm50
-  store double %tpm37, double* %tpm35, align 8
+  store double %tpm37, ptr %tpm19, align 8
   br label %bb22
 bb40:                                             ; preds = %bb40, %bb22
-  %tpm41 = phi i32* [ %tpm51, %bb40 ], [ %tpm27, %bb22 ]
-  %tpm42 = phi double* [ %tpm52, %bb40 ], [ %tpm32, %bb22 ]
+  %tpm41 = phi ptr [ %tpm51, %bb40 ], [ %tpm27, %bb22 ]
+  %tpm42 = phi ptr [ %tpm52, %bb40 ], [ %tpm32, %bb22 ]
   %tpm43 = phi double [ %tpm50, %bb40 ], [ 0.000000e+00, %bb22 ]
-  %tpm44 = load double, double* %tpm42, align 8
-  %tpm45 = load i32, i32* %tpm41, align 4
+  %tpm44 = load double, ptr %tpm42, align 8
+  %tpm45 = load i32, ptr %tpm41, align 4
   %tpm46 = zext i32 %tpm45 to i64
-  %tpm47 = getelementptr inbounds double, double* %tpm19, i64 %tpm46
-  %tpm48 = load double, double* %tpm47, align 8
+  %tpm47 = getelementptr inbounds double, ptr %tpm19, i64 %tpm46
+  %tpm48 = load double, ptr %tpm47, align 8
   %tpm49 = fmul fast double %tpm48, %tpm44
   %tpm50 = fadd fast double %tpm49, %tpm43
-  %tpm51 = getelementptr inbounds i32, i32* %tpm41, i64 1
-  %tpm52 = getelementptr inbounds double, double* %tpm42, i64 1
-  %tpm53 = icmp eq i32* %tpm51, %tpm29
+  %tpm51 = getelementptr inbounds i32, ptr %tpm41, i64 1
+  %tpm52 = getelementptr inbounds double, ptr %tpm42, i64 1
+  %tpm53 = icmp eq ptr %tpm51, %tpm29
   br i1 %tpm53, label %bb33, label %bb40
 }
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
index 0a3a4eb..297de36 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/interleaved-pointer-runtime-check-unprofitable.ll
@@ -10,7 +10,7 @@
 ; CHECK-LABEL: @eddy_diff_caleddy_
 ; CHECK-NOT: vector.memcheck
 
-define fastcc void @eddy_diff_caleddy_(i64* %wet_cl, i64 %0, i32 %ncol.cast.val) {
+define fastcc void @eddy_diff_caleddy_(ptr %wet_cl, i64 %0, i32 %ncol.cast.val) {
 entry:
   %trip.count = add nuw i32 %ncol.cast.val, 1
   %wide.trip.count = zext i32 %ncol.cast.val to i64
@@ -31,53 +31,41 @@ loop.body:
   %indvars.iv774 = phi i64 [ 0, %entry ], [ %indvars.iv.next775, %loop.body ]
   %12 = add nsw i64 %indvars.iv774, -5
   %13 = add i64 %12, %0
-  %14 = getelementptr i64, i64* %wet_cl, i64 %13
-  %15 = bitcast i64* %14 to double*
-  store double 0.000000e+00, double* %15, align 8
-  %16 = add i64 %12, %1
-  %17 = getelementptr i64, i64* %wet_cl, i64 %16
-  %18 = bitcast i64* %17 to double*
-  store double 0.000000e+00, double* %18, align 8
-  %19 = add i64 %12, %2
-  %20 = getelementptr i64, i64* %wet_cl, i64 %19
-  %21 = bitcast i64* %20 to double*
-  store double 0.000000e+00, double* %21, align 8
-  %22 = add i64 %12, %3
-  %23 = getelementptr i64, i64* %wet_cl, i64 %22
-  %24 = bitcast i64* %23 to double*
-  store double 0.000000e+00, double* %24, align 8
-  %25 = add i64 %12, %4
-  %26 = getelementptr i64, i64* %wet_cl, i64 %25
-  %27 = bitcast i64* %26 to double*
-  store double 0.000000e+00, double* %27, align 8
-  %28 = add i64 %12, %5
-  %29 = getelementptr i64, i64* %wet_cl, i64 %28
-  %30 = bitcast i64* %29 to double*
-  store double 0.000000e+00, double* %30, align 8
-  %31 = add i64 %12, %6
-  %32 = getelementptr i64, i64* %wet_cl, i64 %31
-  %33 = bitcast i64* %32 to double*
-  store double 0.000000e+00, double* %33, align 8
-  %34 = add i64 %12, %7
-  %35 = getelementptr i64, i64* %wet_cl, i64 %34
-  %36 = bitcast i64* %35 to double*
-  store double 0.000000e+00, double* %36, align 8
-  %37 = add i64 %12, %8
-  %38 = getelementptr i64, i64* %wet_cl, i64 %37
-  %39 = bitcast i64* %38 to double*
-  store double 0.000000e+00, double* %39, align 8
-  %40 = add i64 %12, %9
-  %41 = getelementptr i64, i64* %wet_cl, i64 %40
-  %42 = bitcast i64* %41 to double*
-  store double 0.000000e+00, double* %42, align 8
-  %43 = add i64 %12, %10
-  %44 = getelementptr i64, i64* %wet_cl, i64 %43
-  %45 = bitcast i64* %44 to double*
-  store double 0.000000e+00, double* %45, align 8
-  %46 = add i64 %12, %11
-  %47 = getelementptr i64, i64* %wet_cl, i64 %46
-  %48 = bitcast i64* %47 to double*
-  store double 0.000000e+00, double* %48, align 8
+  %14 = getelementptr i64, ptr %wet_cl, i64 %13
+  store double 0.000000e+00, ptr %14, align 8
+  %15 = add i64 %12, %1
+  %16 = getelementptr i64, ptr %wet_cl, i64 %15
+  store double 0.000000e+00, ptr %16, align 8
+  %17 = add i64 %12, %2
+  %18 = getelementptr i64, ptr %wet_cl, i64 %17
+  store double 0.000000e+00, ptr %18, align 8
+  %19 = add i64 %12, %3
+  %20 = getelementptr i64, ptr %wet_cl, i64 %19
+  store double 0.000000e+00, ptr %20, align 8
+  %21 = add i64 %12, %4
+  %22 = getelementptr i64, ptr %wet_cl, i64 %21
+  store double 0.000000e+00, ptr %22, align 8
+  %23 = add i64 %12, %5
+  %24 = getelementptr i64, ptr %wet_cl, i64 %23
+  store double 0.000000e+00, ptr %24, align 8
+  %25 = add i64 %12, %6
+  %26 = getelementptr i64, ptr %wet_cl, i64 %25
+  store double 0.000000e+00, ptr %26, align 8
+  %27 = add i64 %12, %7
+  %28 = getelementptr i64, ptr %wet_cl, i64 %27
+  store double 0.000000e+00, ptr %28, align 8
+  %29 = add i64 %12, %8
+  %30 = getelementptr i64, ptr %wet_cl, i64 %29
+  store double 0.000000e+00, ptr %30, align 8
+  %31 = add i64 %12, %9
+  %32 = getelementptr i64, ptr %wet_cl, i64 %31
+  store double 0.000000e+00, ptr %32, align 8
+  %33 = add i64 %12, %10
+  %34 = getelementptr i64, ptr %wet_cl, i64 %33
+  store double 0.000000e+00, ptr %34, align 8
+  %35 = add i64 %12, %11
+  %36 = getelementptr i64, ptr %wet_cl, i64 %35
+  store double 0.000000e+00, ptr %36, align 8
   %indvars.iv.next775 = add nuw nsw i64 %indvars.iv774, 1
   %exitcond778.not = icmp eq i64 %indvars.iv.next775, %wide.trip.count
   br i1 %exitcond778.not, label %loop.end, label %loop.body
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
index ab945fd..a83bf7d 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
@@ -19,7 +19,7 @@
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-ibm-linux-gnu"
 
-define void @QLA_F3_r_veq_norm2_V(float* noalias nocapture %r, [3 x { float, float }]* noalias nocapture readonly %a, i32 signext %n) #0 {
+define void @QLA_F3_r_veq_norm2_V(ptr noalias nocapture %r, ptr noalias nocapture readonly %a, i32 signext %n) #0 {
 entry:
   %cmp24 = icmp sgt i32 %n, 0
   br i1 %cmp24, label %for.cond1.preheader.preheader, label %for.end13
@@ -30,28 +30,28 @@ for.cond1.preheader.preheader:                    ; preds = %entry
 for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ]
   %sum.026 = phi double [ %add10.2, %for.cond1.preheader ], [ 0.000000e+00, %for.cond1.preheader.preheader ]
-  %arrayidx5.realp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 0
-  %arrayidx5.real = load float, float* %arrayidx5.realp, align 8
-  %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 1
-  %arrayidx5.imag = load float, float* %arrayidx5.imagp, align 8
+  %arrayidx5.realp = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 0, i32 0
+  %arrayidx5.real = load float, ptr %arrayidx5.realp, align 8
+  %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 0, i32 1
+  %arrayidx5.imag = load float, ptr %arrayidx5.imagp, align 8
   %mul = fmul fast float %arrayidx5.real, %arrayidx5.real
   %mul9 = fmul fast float %arrayidx5.imag, %arrayidx5.imag
   %add = fadd fast float %mul9, %mul
   %conv = fpext float %add to double
   %add10 = fadd fast double %conv, %sum.026
-  %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 0
-  %arrayidx5.real.1 = load float, float* %arrayidx5.realp.1, align 8
-  %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 1
-  %arrayidx5.imag.1 = load float, float* %arrayidx5.imagp.1, align 8
+  %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 1, i32 0
+  %arrayidx5.real.1 = load float, ptr %arrayidx5.realp.1, align 8
+  %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 1, i32 1
+  %arrayidx5.imag.1 = load float, ptr %arrayidx5.imagp.1, align 8
   %mul.1 = fmul fast float %arrayidx5.real.1, %arrayidx5.real.1
   %mul9.1 = fmul fast float %arrayidx5.imag.1, %arrayidx5.imag.1
   %add.1 = fadd fast float %mul9.1, %mul.1
   %conv.1 = fpext float %add.1 to double
   %add10.1 = fadd fast double %conv.1, %add10
-  %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 0
-  %arrayidx5.real.2 = load float, float* %arrayidx5.realp.2, align 8
-  %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 1
-  %arrayidx5.imag.2 = load float, float* %arrayidx5.imagp.2, align 8
+  %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 2, i32 0
+  %arrayidx5.real.2 = load float, ptr %arrayidx5.realp.2, align 8
+  %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 2, i32 1
+  %arrayidx5.imag.2 = load float, ptr %arrayidx5.imagp.2, align 8
   %mul.2 = fmul fast float %arrayidx5.real.2, %arrayidx5.real.2
   %mul9.2 = fmul fast float %arrayidx5.imag.2, %arrayidx5.imag.2
   %add.2 = fadd fast float %mul9.2, %mul.2
@@ -69,7 +69,7 @@ for.cond.for.end13_crit_edge:                     ; preds = %for.cond1.preheader
 
 for.end13:                                        ; preds = %for.cond.for.end13_crit_edge, %entry
   %sum.0.lcssa = phi float [ %phitmp, %for.cond.for.end13_crit_edge ], [ 0.000000e+00, %entry ]
-  store float %sum.0.lcssa, float* %r, align 4
+  store float %sum.0.lcssa, ptr %r, align 4
   ret void
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
index ca5f796..3f533f15 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-altivec.ll
@@ -11,7 +11,7 @@ declare float @atanhf(float) #0
 
 ; MASSV is unsupported for AltiVec.
 ; Check that massv entries are not generated.
-define void @cbrt_f64(double* nocapture %varray) {
+define void @cbrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f64(
 ; CHECK-NOT: __cbrtd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -24,8 +24,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cbrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -34,7 +34,7 @@ for.end:
   ret void
 }
 
-define void @cbrt_f32(float* nocapture %varray) {
+define void @cbrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f32(
 ; CHECK-NOT: __cbrtf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -47,8 +47,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cbrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -57,7 +57,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f64(double* nocapture %varray) {
+define void @atanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f64(
 ; CHECK-NOT: __atanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -70,8 +70,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -80,7 +80,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f32(float* nocapture %varray) {
+define void @atanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f32(
 ; CHECK-NOT: __atanhf4{{.*}}<2 x double>
 ; CHECK: ret void
@@ -93,8 +93,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
index d79724a..c2871dd 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll
@@ -86,7 +86,7 @@ declare float @acoshf(float) #0
 declare double @atanh(double) #0
 declare float @atanhf(float) #0
 
-define void @cbrt_f64(double* nocapture %varray) {
+define void @cbrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f64(
 ; CHECK: __cbrtd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -99,8 +99,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cbrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -109,7 +109,7 @@ for.end:
   ret void
 }
 
-define void @cbrt_f32(float* nocapture %varray) {
+define void @cbrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cbrt_f32(
 ; CHECK: __cbrtf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -122,8 +122,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cbrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -132,7 +132,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64(
 ; CHECK:  __powd2{{.*}}<2 x double>
 ; CHECK:  ret void
@@ -144,11 +144,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @pow(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -157,7 +157,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64_intrinsic(
 ; CHECK: __powd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -169,11 +169,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -182,7 +182,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32(
 ; CHECK: __powf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -194,11 +194,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -207,7 +207,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrinsic(
 ; CHECK: __powf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -219,11 +219,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -232,7 +232,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f64(double* nocapture %varray) {
+define void @sqrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f64(
 ; CHECK-NOT: __sqrtd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -245,8 +245,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sqrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -255,7 +255,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f32(float* nocapture %varray) {
+define void @sqrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f32(
 ; CHECK-NOT: __sqrtf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -268,8 +268,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sqrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -278,7 +278,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64(
 ; CHECK: __expd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -291,8 +291,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -301,7 +301,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64_intrinsic(double* nocapture %varray) {
+define void @exp_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64_intrinsic(
 ; CHECK: __expd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -314,8 +314,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -324,7 +324,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32(
 ; CHECK: __expf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -337,8 +337,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -347,7 +347,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32_intrinsic(float* nocapture %varray) {
+define void @exp_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrinsic(
 ; CHECK: __expf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -360,8 +360,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -370,7 +370,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64(double* nocapture %varray) {
+define void @exp2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64(
 ; CHECK: __exp2d2{{.*}}<2 x double>
 ; CHECK:  ret void
@@ -383,8 +383,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -393,7 +393,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64_intrinsic(double* nocapture %varray) {
+define void @exp2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64_intrinsic(
 ; CHECK: __exp2d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -406,8 +406,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -416,7 +416,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32(float* nocapture %varray) {
+define void @exp2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32(
 ; CHECK: __exp2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -429,8 +429,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @exp2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -439,7 +439,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32_intrinsic(float* nocapture %varray) {
+define void @exp2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32_intrinsic(
 ; CHECK: __exp2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -452,8 +452,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -462,7 +462,7 @@ for.end:
   ret void
 }
 
-define void @expm1_f64(double* nocapture %varray) {
+define void @expm1_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @expm1_f64(
 ; CHECK: __expm1d2{{.*}}<2 x double>
 ; CHECK:  ret void
@@ -475,8 +475,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @expm1(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -485,7 +485,7 @@ for.end:
   ret void
 }
 
-define void @expm1_f32(float* nocapture %varray) {
+define void @expm1_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @expm1_f32(
 ; CHECK: __expm1f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -498,8 +498,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @expm1f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -508,7 +508,7 @@ for.end:
   ret void
 }
 
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64(
 ; CHECK: __logd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -521,8 +521,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -531,7 +531,7 @@ for.end:
   ret void
 }
 
-define void @log_f64_intrinsic(double* nocapture %varray) {
+define void @log_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64_intrinsic(
 ; CHECK: __logd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -544,8 +544,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -554,7 +554,7 @@ for.end:
   ret void
 }
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32(
 ; CHECK: __logf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -567,8 +567,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -577,7 +577,7 @@ for.end:
   ret void
 }
 
-define void @log_f32_intrinsic(float* nocapture %varray) {
+define void @log_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32_intrinsic(
 ; CHECK: __logf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -590,8 +590,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -600,7 +600,7 @@ for.end:
   ret void
 }
 
-define void @log1p_f64(double* nocapture %varray) {
+define void @log1p_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log1p_f64(
 ; CHECK: __log1pd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -613,8 +613,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log1p(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -623,7 +623,7 @@ for.end:
   ret void
 }
 
-define void @log1p_f32(float* nocapture %varray) {
+define void @log1p_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log1p_f32(
 ; CHECK: __log1pf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -636,8 +636,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log1pf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -646,7 +646,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64(double* nocapture %varray) {
+define void @log10_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64(
 ; CHECK: __log10d2(<2 x double>
 ; CHECK: ret void
@@ -659,8 +659,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log10(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -669,7 +669,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64_intrinsic(double* nocapture %varray) {
+define void @log10_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64_intrinsic(
 ; CHECK: __log10d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -682,8 +682,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log10.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -692,7 +692,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32(float* nocapture %varray) {
+define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32(
 ; CHECK: __log10f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -705,8 +705,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log10f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -715,7 +715,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32_intrinsic(float* nocapture %varray) {
+define void @log10_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32_intrinsic(
 ; CHECK: __log10f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -728,8 +728,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log10.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -738,7 +738,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64(double* nocapture %varray) {
+define void @log2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64(
 ; CHECK: __log2d2(<2 x double>
 ; CHECK: ret void
@@ -751,8 +751,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -761,7 +761,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64_intrinsic(double* nocapture %varray) {
+define void @log2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64_intrinsic(
 ; CHECK: __log2d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -774,8 +774,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -784,7 +784,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32(float* nocapture %varray) {
+define void @log2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32(
 ; CHECK: __log2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -797,8 +797,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -807,7 +807,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32_intrinsic(float* nocapture %varray) {
+define void @log2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32_intrinsic(
 ; CHECK: __log2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -820,8 +820,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -830,7 +830,7 @@ for.end:
   ret void
 }
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK: __sind2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -843,8 +843,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -853,7 +853,7 @@ for.end:
   ret void
 }
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK: __sind2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -866,8 +866,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -876,7 +876,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK: __sinf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -889,8 +889,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -899,7 +899,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK: __sinf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -912,8 +912,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -922,7 +922,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK: __cosd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -935,8 +935,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -945,7 +945,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @__cosd2(<2 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -958,8 +958,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -968,7 +968,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK: __cosf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -981,8 +981,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -991,7 +991,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK: __cosf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1004,8 +1004,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1014,7 +1014,7 @@ for.end:
   ret void
 }
 
-define void @tan_f64(double* nocapture %varray) {
+define void @tan_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f64(
 ; CHECK: __tand2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1027,8 +1027,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @tan(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1037,7 +1037,7 @@ for.end:
   ret void
 }
 
-define void @tan_f32(float* nocapture %varray) {
+define void @tan_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f32(
 ; CHECK: __tanf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1050,8 +1050,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @tanf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1060,7 +1060,7 @@ for.end:
   ret void
 }
 
-define void @asin_f64(double* nocapture %varray) {
+define void @asin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f64(
 ; CHECK: __asind2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1073,8 +1073,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @asin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1083,7 +1083,7 @@ for.end:
   ret void
 }
 
-define void @asin_f32(float* nocapture %varray) {
+define void @asin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f32(
 ; CHECK: __asinf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1096,8 +1096,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @asinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1106,7 +1106,7 @@ for.end:
   ret void
 }
 
-define void @acos_f64(double* nocapture %varray) {
+define void @acos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @acos_f64(
 ; CHECK: __acosd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1119,8 +1119,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @acos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1129,7 +1129,7 @@ for.end:
   ret void
 }
 
-define void @acos_f32(float* nocapture %varray) {
+define void @acos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @acos_f32(
 ; CHECK: __acosf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1142,8 +1142,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @acosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1152,7 +1152,7 @@ for.end:
   ret void
 }
 
-define void @atan_f64(double* nocapture %varray) {
+define void @atan_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan_f64(
 ; CHECK: __atand2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1165,8 +1165,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atan(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1175,7 +1175,7 @@ for.end:
   ret void
 }
 
-define void @atan_f32(float* nocapture %varray) {
+define void @atan_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan_f32(
 ; CHECK: __atanf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1188,8 +1188,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1198,7 +1198,7 @@ for.end:
   ret void
 }
 
-define void @atan2_f64(double* nocapture %varray) {
+define void @atan2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan2_f64(
 ; CHECK: __atan2d2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1211,8 +1211,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atan2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1221,7 +1221,7 @@ for.end:
   ret void
 }
 
-define void @atan2_f32(float* nocapture %varray) {
+define void @atan2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan2_f32(
 ; CHECK: __atan2f4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1234,8 +1234,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atan2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1244,7 +1244,7 @@ for.end:
   ret void
 }
 
-define void @sinh_f64(double* nocapture %varray) {
+define void @sinh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sinh_f64(
 ; CHECK: __sinhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1257,8 +1257,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sinh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1267,7 +1267,7 @@ for.end:
   ret void
 }
 
-define void @sinh_f32(float* nocapture %varray) {
+define void @sinh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sinh_f32(
 ; CHECK: __sinhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1280,8 +1280,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1290,7 +1290,7 @@ for.end:
   ret void
 }
 
-define void @cosh_f64(double* nocapture %varray) {
+define void @cosh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f64(
 ; CHECK: __coshd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1303,8 +1303,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cosh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1313,7 +1313,7 @@ for.end:
   ret void
 }
 
-define void @cosh_f32(float* nocapture %varray) {
+define void @cosh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f32(
 ; CHECK: __coshf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1326,8 +1326,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @coshf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1336,7 +1336,7 @@ for.end:
   ret void
 }
 
-define void @tanh_f64(double* nocapture %varray) {
+define void @tanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @tanh_f64(
 ; CHECK: __tanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1349,8 +1349,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @tanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1359,7 +1359,7 @@ for.end:
   ret void
 }
 
-define void @tanh_f32(float* nocapture %varray) {
+define void @tanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @tanh_f32(
 ; CHECK: __tanhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1372,8 +1372,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @tanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1382,7 +1382,7 @@ for.end:
   ret void
 }
 
-define void @asinh_f64(double* nocapture %varray) {
+define void @asinh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @asinh_f64(
 ; CHECK: __asinhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1395,8 +1395,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @asinh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1405,7 +1405,7 @@ for.end:
   ret void
 }
 
-define void @asinh_f32(float* nocapture %varray) {
+define void @asinh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @asinh_f32(
 ; CHECK: __asinhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1418,8 +1418,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @asinhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1428,7 +1428,7 @@ for.end:
   ret void
 }
 
-define void @acosh_f64(double* nocapture %varray) {
+define void @acosh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @acosh_f64(
 ; CHECK: __acoshd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1441,8 +1441,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @acosh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1451,7 +1451,7 @@ for.end:
   ret void
 }
 
-define void @acosh_f32(float* nocapture %varray) {
+define void @acosh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @acosh_f32(
 ; CHECK: __acoshf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1464,8 +1464,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @acoshf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1474,7 +1474,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f64(double* nocapture %varray) {
+define void @atanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f64(
 ; CHECK: __atanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -1487,8 +1487,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -1497,7 +1497,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f32(float* nocapture %varray) {
+define void @atanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f32(
 ; CHECK: __atanhf4{{.*}}<4 x float>
 ; CHECK: ret void
@@ -1510,8 +1510,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
index de69c62..28466fb 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-nobuiltin.ll
@@ -7,7 +7,7 @@ declare double @atanh(double) #1
 declare float @atanhf(float) #1
 
 ; Check that functions marked as nobuiltin are not lowered to massv entries.
-define void @atanh_f64(double* nocapture %varray) {
+define void @atanh_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f64(
 ; CHECK-NOT: __atanhd2{{.*}}<2 x double>
 ; CHECK: ret void
@@ -20,8 +20,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @atanh(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -30,7 +30,7 @@ for.end:
   ret void
 }
 
-define void @atanh_f32(float* nocapture %varray) {
+define void @atanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atanh_f32(
 ; CHECK-NOT: __atanhf4{{.*}}<2 x double>
 ; CHECK: ret void
@@ -43,8 +43,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @atanhf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
index 9502dd0..dde3697 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-unsupported.ll
@@ -10,7 +10,7 @@ declare double @llvm.sqrt.f64(double) #0
 declare float @llvm.sqrt.f32(float) #0
 
 ; Vector counterpart of ceil is unsupported in MASSV library.
-define void @ceil_f64(double* nocapture %varray) {
+define void @ceil_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @ceil_f64(
 ; CHECK-NOT: __ceild2_massv{{.*}}<2 x double>
 ; CHECK-NOT: __ceild2_P8{{.*}}<2 x double>
@@ -25,8 +25,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @ceil(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -36,7 +36,7 @@ for.end:
 }
 
 ; Vector counterpart of fabs is unsupported in MASSV library.
-define void @fabs_f32(float* nocapture %varray) {
+define void @fabs_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @fabs_f32(
 ; CHECK-NOT: __fabsf4_massv{{.*}}<4 x float>
 ; CHECK-NOT: __fabsf4_P8{{.*}}<4 x float>
@@ -51,8 +51,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @fabsf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -63,7 +63,7 @@ for.end:
 
 ; sqrt intrinsics are converted to their vector counterpart intrinsics.
 ; They are not lowered to MASSV entries.
-define void @sqrt_f64_intrinsic(double* nocapture %varray) {
+define void @sqrt_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f64_intrinsic(
 ; CHECK: llvm.sqrt.v2f64{{.*}}<2 x double>
 ; CHECK: ret void
@@ -76,8 +76,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sqrt.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -86,7 +86,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f32_intrinsic(float* nocapture %varray) {
+define void @sqrt_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f32_intrinsic(
 ; CHECK: llvm.sqrt.v4f32{{.*}}<4 x float>
 ; CHECK: ret void
@@ -99,8 +99,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sqrt.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll
index 88cfa5b..b88254e 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization-profitability.ll
@@ -18,7 +18,7 @@ target triple = "powerpc64le-unknown-linux-gnu"
 ; CHECK-NOT: vec.epilog.middle.block
 ; CHECK: ret void
 
-define dso_local void @f1(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #0 {
+define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #0 {
 entry:
   %cmp1 = icmp sgt i32 %N, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -29,13 +29,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %0, %1
-  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
@@ -56,7 +56,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-NOT: vec.epilog.middle.block
 ; CHECK: ret void
 
-define dso_local void @f2(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) #1 {
+define dso_local void @f2(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #1 {
 entry:
   %cmp1 = icmp sgt i32 %N, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -67,13 +67,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %0, %1
-  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
@@ -104,7 +104,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-MIN-4: vec.epilog.middle.block
 ; CHECK-MIN-4: ret void
 
-define dso_local void @f3(float* noalias %aa, float* noalias %bb, float* noalias %cc, i32 signext %N) {
+define dso_local void @f3(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) {
 entry:
   %cmp1 = icmp sgt i32 %N, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -115,13 +115,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %bb, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %cc, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %0, %1
-  %arrayidx4 = getelementptr inbounds float, float* %aa, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll
index 6267f74..141a153 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/pr41179.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -passes=loop-vectorize -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
 
-define void @foo(i8* %start, i8* %end) {
+define void @foo(ptr %start, ptr %end) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[START2:%.*]] = ptrtoint i8* [[START:%.*]] to i64
-; CHECK-NEXT:    [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64
+; CHECK-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START:%.*]] to i64
+; CHECK-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[END1]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[END1]], -1
 ; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[START2]], i64 [[TMP1]])
@@ -25,10 +25,10 @@ define void @foo(i8* %start, i8* %end) {
 ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i32 [[OFFSET_IDX]], -1
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 -1, [[INDUCTION]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i32 -1, [[INDUCTION3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[END]], i32 [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[END]], i32 [[TMP5]]
-; CHECK-NEXT:    store i8 0, i8* [[TMP6]], align 1
-; CHECK-NEXT:    store i8 0, i8* [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[END]], i32 [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[END]], i32 [[TMP5]]
+; CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+; CHECK-NEXT:    store i8 0, ptr [[TMP7]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -41,9 +41,9 @@ define void @foo(i8* %start, i8* %end) {
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[COUNT_09:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ADD]] = add nsw i32 -1, [[COUNT_09]]
-; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, i8* [[END]], i32 [[ADD]]
-; CHECK-NEXT:    store i8 0, i8* [[G]], align 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8* [[START]], [[G]]
+; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, ptr [[END]], i32 [[ADD]]
+; CHECK-NEXT:    store i8 0, ptr [[G]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult ptr [[START]], [[G]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       while.end.loopexit:
 ; CHECK-NEXT:    ret void
@@ -54,9 +54,9 @@ entry:
 while.body:                                       ; preds = %while.body, %entry
   %count.09 = phi i32 [ %add, %while.body ], [ 0, %entry ]
   %add = add nsw i32 -1, %count.09
-  %G = getelementptr i8, i8* %end, i32 %add
-  store i8 0, i8* %G
-  %cmp = icmp ult i8* %start, %G
+  %G = getelementptr i8, ptr %end, i32 %add
+  store i8 0, ptr %G
+  %cmp = icmp ult ptr %start, %G
   br i1 %cmp, label %while.body, label %while.end.loopexit
 
 while.end.loopexit:                               ; preds = %while.body
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
index 9823cf9..a567045 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -19,7 +19,7 @@
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-ibm-linux-gnu"
 
-define void @test(double* nocapture readonly %arr, i32 signext %len) #0 {
+define void @test(ptr nocapture readonly %arr, i32 signext %len) #0 {
 entry:
   %cmp4 = icmp sgt i32 %len, 0
   br i1 %cmp4, label %for.body.lr.ph, label %for.end
@@ -31,8 +31,8 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %redx.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %arr, i64 %indvars.iv
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %arr, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx, align 8
   %add = fadd fast double %1, %redx.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv to i32
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
index 5c82e20..e10acf4 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -3,7 +3,7 @@ target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
 ; Function Attrs: nounwind
-define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
@@ -18,15 +18,15 @@ for.body:                                         ; preds = %for.body, %entry
   %0 = shl nsw i64 %indvars.iv, 1
   %odd.idx = add nsw i64 %0, 1
 
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %0
-  %arrayidx.odd = getelementptr inbounds double, double* %b, i64 %odd.idx
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %0
+  %arrayidx.odd = getelementptr inbounds double, ptr %b, i64 %odd.idx
 
-  %1 = load double, double* %arrayidx, align 8
-  %2 = load double, double* %arrayidx.odd, align 8
+  %1 = load double, ptr %arrayidx, align 8
+  %2 = load double, ptr %arrayidx.odd, align 8
 
   %add = fadd double %1, %2
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
-  store double %add, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  store double %add, ptr %arrayidx2, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
index dae0e54..1bacb57 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
@@ -10,50 +10,46 @@ define zeroext i32 @test() #0 {
 entry:
   %a = alloca [1600 x i32], align 4
   %c = alloca [1600 x i32], align 4
-  %0 = bitcast [1600 x i32]* %a to i8*
-  call void @llvm.lifetime.start(i64 6400, i8* %0) #3
+  call void @llvm.lifetime.start(i64 6400, ptr %a) #3
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
-  %1 = bitcast [1600 x i32]* %c to i8*
-  call void @llvm.lifetime.start(i64 6400, i8* %1) #3
-  %arraydecay = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 0
-  %arraydecay1 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 0
-  %call = call signext i32 @bar(i32* %arraydecay, i32* %arraydecay1) #3
+  call void @llvm.lifetime.start(i64 6400, ptr %c) #3
+  %call = call signext i32 @bar(ptr %a, ptr %c) #3
   br label %for.body6
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv25 = phi i64 [ 0, %entry ], [ %indvars.iv.next26, %for.body ]
-  %arrayidx = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 %indvars.iv25
-  %2 = trunc i64 %indvars.iv25 to i32
-  store i32 %2, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1600 x i32], ptr %a, i64 0, i64 %indvars.iv25
+  %0 = trunc i64 %indvars.iv25 to i32
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1
   %exitcond27 = icmp eq i64 %indvars.iv.next26, 1600
   br i1 %exitcond27, label %for.cond.cleanup, label %for.body
 
 for.cond.cleanup5:                                ; preds = %for.body6
-  call void @llvm.lifetime.end(i64 6400, i8* nonnull %1) #3
-  call void @llvm.lifetime.end(i64 6400, i8* %0) #3
+  call void @llvm.lifetime.end(i64 6400, ptr nonnull %c) #3
+  call void @llvm.lifetime.end(i64 6400, ptr %a) #3
   ret i32 %add
 
 for.body6:                                        ; preds = %for.body6, %for.cond.cleanup
   %indvars.iv = phi i64 [ 0, %for.cond.cleanup ], [ %indvars.iv.next, %for.body6 ]
   %s.022 = phi i32 [ 0, %for.cond.cleanup ], [ %add, %for.body6 ]
-  %arrayidx8 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx8, align 4
-  %add = add i32 %3, %s.022
+  %arrayidx8 = getelementptr inbounds [1600 x i32], ptr %c, i64 0, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx8, align 4
+  %add = add i32 %1, %s.022
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
 }
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.start(i64, ptr nocapture) #1
 
 ; Function Attrs: argmemonly nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, ptr nocapture) #1
 
-declare signext i32 @bar(i32*, i32*) #2
+declare signext i32 @bar(ptr, ptr) #2
 
 attributes #0 = { nounwind "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
 attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
index 28c105e..41d9fe0 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -9,7 +9,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 
 define signext i32 @s173() #0 {
 entry:
-  %0 = load i32, i32* @ntimes, align 4
+  %0 = load i32, ptr @ntimes, align 4
   %cmp21 = icmp sgt i32 %0, 0
   br i1 %cmp21, label %for.cond1.preheader, label %for.end12
 
@@ -19,21 +19,21 @@ for.cond1.preheader:                              ; preds = %for.end, %entry
 
 for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
-  %arrayidx5 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
-  %2 = load float, float* %arrayidx5, align 4
+  %arrayidx = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 3, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx5, align 4
   %add = fadd float %1, %2
   %3 = add nsw i64 %indvars.iv, 16000
-  %arrayidx8 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
-  store float %add, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds %struct.GlobalData, ptr @global_data, i64 0, i32 0, i64 %3
+  store float %add, ptr %arrayidx8, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 16000
   br i1 %exitcond, label %for.end, label %for.body3
 
 for.end:                                          ; preds = %for.body3
   %inc11 = add nsw i32 %nl.022, 1
-  %4 = load i32, i32* @ntimes, align 4
+  %4 = load i32, ptr @ntimes, align 4
   %mul = mul nsw i32 %4, 10
   %cmp = icmp slt i32 %inc11, %mul
   br i1 %cmp, label %for.cond1.preheader, label %for.end12
@@ -42,7 +42,7 @@ for.end12:                                        ; preds = %for.end, %entry
   ret i32 0
 
 ; CHECK-LABEL: @s173
-; CHECK: load <4 x float>, <4 x float>*
+; CHECK: load <4 x float>, ptr
 ; CHECK: add nsw i64 %index, 16000
 ; CHECK: ret i32 0
 }
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
index a53c2bd..64eb35e 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-call.ll
@@ -3,7 +3,7 @@
 ; RUN:   -vectorizer-maximize-bandwidth -passes='default<O2>,inject-tli-mappings,loop-vectorize' \
 ; RUN:   -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s
 
-define dso_local double @test(float* %Arr) {
+define dso_local double @test(ptr %Arr) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -11,9 +11,8 @@ define dso_local double @test(float* %Arr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[ARR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[ARR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
 ; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast <2 x double> @__sind2(<2 x double> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5]] = fadd fast <2 x double> [[TMP4]], [[VEC_PHI]]
@@ -39,8 +38,8 @@ for.cond.cleanup:
 
 for.body:
   %idxprom = sext i32 %i.0 to i64
-  %arrayidx = getelementptr inbounds float, float* %Arr, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %Arr, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double
   %1 = call fast double @llvm.sin.f64(double %conv)
   %add = fadd fast double %Sum.0, %1
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
index 4895943..aafe849 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -vectorizer-maximize-bandwidth -mtriple=powerpc64le-- -S \
 ; RUN: -passes=loop-simplify,loop-rotate,loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -force-vector-interleave=1 < %s | FileCheck %s
-define dso_local double @test(float* %Arr) {
+define dso_local double @test(ptr %Arr) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -9,9 +9,8 @@ define dso_local double @test(float* %Arr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[ARR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[ARR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x double> @__sind2_P8(<2 x double> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5]] = fadd fast <2 x double> [[VEC_PHI]], [[TMP4]]
@@ -36,8 +35,8 @@ for.cond.cleanup:
 
 for.body:
   %idxprom = sext i32 %i.0 to i64
-  %arrayidx = getelementptr inbounds float, float* %Arr, i64 %idxprom
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %Arr, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double
   %1 = call fast double @llvm.sin.f64(double %conv) #1
   %add = fadd fast double %Sum.0, %1
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
index 2bbaf9d..d51feae 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
@@ -9,7 +9,7 @@
 ; CHECK: %{{.*}} = add {{.*}}, 8
 
 ; Function Attrs: nofree norecurse nosync nounwind writeonly
-define dso_local void @foo(i32 signext %n, i32* nocapture %A) local_unnamed_addr #0 {
+define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 {
 entry:
   %cmp5 = icmp sgt i32 %n, 0
   br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
@@ -26,9 +26,9 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4, !tbaa !4
+  store i32 %0, ptr %arrayidx, align 4, !tbaa !4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !8
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
index 7f16031..769fc02 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
@@ -10,7 +10,7 @@
 ; ADD
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -26,8 +26,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -40,7 +40,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; OR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @or(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @or
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -56,8 +56,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %or = or i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -70,7 +70,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; AND
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @and(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @and
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -86,8 +86,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %and = and i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -100,7 +100,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; XOR
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define i32 @xor(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @xor
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -116,8 +116,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %xor = xor i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -130,7 +130,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; SMIN
 
-define i32 @smin(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @smin
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -149,8 +149,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp slt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -164,7 +164,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; UMAX
 
-define i32 @umax(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @umax
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32>
@@ -183,8 +183,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp.i = icmp ugt i32 %0, %sum.010
   %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010
   %iv.next = add nuw nsw i64 %iv, 1
@@ -198,7 +198,7 @@ for.end:
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
 ; FADD (FAST)
 
-define float @fadd_fast(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -214,8 +214,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -227,7 +227,7 @@ for.end:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2)
-define bfloat @fadd_fast_bfloat(bfloat* noalias nocapture readonly %a, i64 %n) {
+define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) {
 ; CHECK-LABEL: @fadd_fast_bfloat
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <8 x bfloat>
@@ -243,8 +243,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds bfloat, bfloat* %a, i64 %iv
-  %0 = load bfloat, bfloat* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
+  %0 = load bfloat, ptr %arrayidx, align 4
   %add = fadd fast bfloat %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -257,7 +257,7 @@ for.end:
 ; FMIN (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmin_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmin_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -276,8 +276,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp olt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -291,7 +291,7 @@ for.end:
 ; FMAX (FAST)
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmax_fast(float* noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
 ; CHECK-LABEL: @fmax_fast
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
@@ -310,8 +310,8 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.i = fcmp fast ogt float %0, %sum.07
   %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
@@ -328,7 +328,7 @@ for.end:
 
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @mul(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
+define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @mul
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -344,8 +344,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, %sum.07
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -358,7 +358,7 @@ for.end:                                 ; preds = %for.body, %entry
 ; Note: This test was added to ensure we always check the legality of reductions (and emit a warning if necessary) before checking for memory dependencies
 ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop.
 ; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2)
-define i32 @memory_dependence(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
+define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @memory_dependence
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD1:.*]] = load <4 x i32>
@@ -378,14 +378,14 @@ entry:
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %i
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %add2 = add nuw nsw i64 %i, 32
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %add2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2
+  store i32 %add, ptr %arrayidx3, align 4
   %mul = mul nsw i32 %1, %sum
   %inc = add nuw nsw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %n
@@ -396,7 +396,7 @@ for.end:
 }
 
 ; CHECK-REMARK: vectorized loop (vectorization width: vscale x 2, interleaved count: 2)
-define float @fmuladd(float* %a, float* %b, i64 %n) {
+define float @fmuladd(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @fmuladd(
 ; CHECK: vector.body:
 ; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 2 x float>
@@ -415,10 +415,10 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
+  %1 = load float, ptr %arrayidx2, align 4
   %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
index b4ad393..3586f78 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
@@ -10,20 +10,20 @@
 ; }
 ;
 ; CHECK: <vscale x 2 x i32>
-define void @test(i32* %a, i32* %b) {
+define void @test(ptr %a, ptr %b) {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 64
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
index f41acc6..53a809d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
@@ -3,7 +3,7 @@
 
 ; Make sure we don't unroll scalar loops in the loop vectorizer.
 ;
-define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind {
+define void @small_loop(ptr nocapture %inArray, i32 %size) nounwind {
 ; CHECK-LABEL: @small_loop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0
@@ -12,10 +12,10 @@ define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV1:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[INARRAY:%.*]], i32 [[IV]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[INARRAY:%.*]], i32 [[IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = add nsw i32 [[TMP2]], 6
-; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[TMP3]], ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[IV1]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[IV1]], [[SIZE]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP]]
@@ -30,10 +30,10 @@ entry:
 
 loop:                                          ; preds = %entry, %loop
   %iv = phi i32 [ %iv1, %loop ], [ 0, %entry ]
-  %1 = getelementptr inbounds i32, i32* %inArray, i32 %iv
-  %2 = load i32, i32* %1, align 4
+  %1 = getelementptr inbounds i32, ptr %inArray, i32 %iv
+  %2 = load i32, ptr %1, align 4
   %3 = add nsw i32 %2, 6
-  store i32 %3, i32* %1, align 4
+  store i32 %3, ptr %1, align 4
   %iv1 = add i32 %iv, 1
   %cond = icmp eq i32 %iv1, %size
   br i1 %cond, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
index 2f70b23..d8dbde4 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll
@@ -6,7 +6,7 @@
 
 ; Check that the addresses for a scalarized memory access is not extracted
 ; from a vector register.
-define i32 @foo(i32* nocapture %A) {
+define i32 @foo(ptr nocapture %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -17,10 +17,10 @@ define i32 @foo(i32* nocapture %A) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
-; CHECK-NEXT:    store i32 4, i32* [[TMP3]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    store i32 4, ptr [[TMP3]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -40,8 +40,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  store i32 4, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  store i32 4, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10000
@@ -53,7 +53,7 @@ for.end:
 
 
 ; Check that a load of address is scalarized.
-define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
+define i32 @foo1(ptr nocapture noalias %A, ptr nocapture %PtrPtr) {
 ; CHECK-LABEL: @foo1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -62,17 +62,16 @@ define i32 @foo1(i32* nocapture noalias %A, i32** nocapture %PtrPtr) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[PTRPTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32*, i32** [[PTRPTR]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[PTRPTR]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP8]], <2 x i32>* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -91,11 +90,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %ptr = getelementptr inbounds i32*, i32** %PtrPtr, i64 %indvars.iv
-  %el = load i32*, i32** %ptr
-  %v = load i32, i32* %el
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %v, i32* %arrayidx, align 4
+  %ptr = getelementptr inbounds ptr, ptr %PtrPtr, i64 %indvars.iv
+  %el = load ptr, ptr %ptr
+  %v = load i32, ptr %el
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %v, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10000
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll
index 4b02e81..c8686d9 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll
@@ -8,20 +8,20 @@
 ; vector compare plus a test under mask instruction. This cost is modelled on
 ; the extractelement of i1.
 
-define void @fun(i32* %arr, i64 %trip.count) {
+define void @fun(ptr %arr, i64 %trip.count) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %indvars.iv
-  %l = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %arr, i64 %indvars.iv
+  %l = load i32, ptr %arrayidx, align 4
   %cmp55 = icmp sgt i32 %l, 0
   br i1 %cmp55, label %if.then, label %for.inc
 
 if.then:
   %sub = sub nsw i32 0, %l
-  store i32 %sub, i32* %arrayidx, align 4
+  store i32 %sub, ptr %arrayidx, align 4
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll
index c5bc23d..6de9a17 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll
@@ -7,15 +7,15 @@
 ; Check that a scalarized load does not get a zero cost in a vectorized
 ; loop. It can only be folded into the add operand in the scalar loop.
 
-define i32 @fun(i64* %data, i64 %n, i64 %s, i32* %Src) {
+define i32 @fun(ptr %data, i64 %n, i64 %s, ptr %Src) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %acc = phi i32 [ 0, %entry ], [ %acc_next, %for.body ]
-  %gep = getelementptr inbounds i32, i32* %Src, i64 %iv
-  %ld = load i32, i32* %gep
+  %gep = getelementptr inbounds i32, ptr %Src, i64 %iv
+  %ld = load i32, ptr %gep
   %acc_next = add i32 %acc, %ld
   %iv.next = add nuw nsw i64 %iv, 2
   %cmp110.us = icmp slt i64 %iv.next, %n
@@ -24,5 +24,5 @@ for.body:
 for.end:
   ret i32 %acc_next
 
-; CHECK: Found an estimated cost of 4 for VF 4 For instruction:   %ld = load i32, i32* %gep
+; CHECK: Found an estimated cost of 4 for VF 4 For instruction:   %ld = load i32, ptr %gep
 }
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
index 399bd44..deb8109 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-store-scalarization-cost.ll
@@ -7,16 +7,16 @@
 ; Check that a scalarized load/store does not get a cost for insterts/
 ; extracts, since z13 supports element load/store.
 
-define void @fun(i32* %data, i64 %n) {
+define void @fun(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %data, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %tmp2 = add i32 %tmp1, 1
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -24,10 +24,10 @@ for.body:
 for.end:
   ret void
 
-; CHECK: LV: Scalarizing:  %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Scalarizing:  store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: LV: Scalarizing:  %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Scalarizing:  store i32 %tmp2, ptr %tmp0, align 4
 
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp2, ptr %tmp0, align 4
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll
index ad45fba..c75899e 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll
@@ -15,28 +15,27 @@
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %ld1 = load i16
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %ld2 = load i16
 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %ld3 = load i16
-define void @fun0(i16 *%ptr, i16 *%dst) {
+define void @fun0(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i16* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i16, i16* %ivptr, i64 0
-  %ld0 = load i16, i16* %ptr0
-  %ptr1 = getelementptr inbounds i16, i16* %ivptr, i64 1
-  %ld1 = load i16, i16* %ptr1
-  %ptr2 = getelementptr inbounds i16, i16* %ivptr, i64 2
-  %ld2 = load i16, i16* %ptr2
-  %ptr3 = getelementptr inbounds i16, i16* %ivptr, i64 3
-  %ld3 = load i16, i16* %ptr3
+  %ld0 = load i16, ptr %ivptr
+  %ptr1 = getelementptr inbounds i16, ptr %ivptr, i64 1
+  %ld1 = load i16, ptr %ptr1
+  %ptr2 = getelementptr inbounds i16, ptr %ivptr, i64 2
+  %ld2 = load i16, ptr %ptr2
+  %ptr3 = getelementptr inbounds i16, ptr %ivptr, i64 3
+  %ld3 = load i16, ptr %ptr3
   %a1 = add i16 %ld0, %ld1
   %a2 = add i16 %a1, %ld2
   %a3 = add i16 %a2, %ld3
-  %dstptr = getelementptr inbounds i16, i16* %dst, i64 %iv
-  store i16 %a3, i16* %dstptr
-  %ptr.next = getelementptr inbounds i16, i16* %ivptr, i64 4
+  %dstptr = getelementptr inbounds i16, ptr %dst, i64 %iv
+  store i16 %a3, ptr %dstptr
+  %ptr.next = getelementptr inbounds i16, ptr %ivptr, i64 4
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 
@@ -50,19 +49,18 @@ for.end:
 ;
 ; CHECK: LV: Checking a loop in 'fun1'
 ; CHECK: LV: Found an estimated cost of 5 for VF 16 For instruction:   %ld0 = load i8
-define void @fun1(i8 *%ptr, i8 *%dst) {
+define void @fun1(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i8* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i8, i8* %ivptr, i64 0
-  %ld0 = load i8, i8* %ptr0
-  %dstptr = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %ld0, i8* %dstptr
-  %ptr.next = getelementptr inbounds i8, i8* %ivptr, i64 3
+  %ld0 = load i8, ptr %ivptr
+  %dstptr = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %ld0, ptr %dstptr
+  %ptr.next = getelementptr inbounds i8, ptr %ivptr, i64 3
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 
@@ -79,28 +77,27 @@ for.end:
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld1 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld2 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld3 = load i8
-define void @fun2(i8 *%ptr, i8 *%dst) {
+define void @fun2(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i8* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i8, i8* %ivptr, i64 0
-  %ld0 = load i8, i8* %ptr0
-  %ptr1 = getelementptr inbounds i8, i8* %ivptr, i64 1
-  %ld1 = load i8, i8* %ptr1
-  %ptr2 = getelementptr inbounds i8, i8* %ivptr, i64 2
-  %ld2 = load i8, i8* %ptr2
-  %ptr3 = getelementptr inbounds i8, i8* %ivptr, i64 3
-  %ld3 = load i8, i8* %ptr3
+  %ld0 = load i8, ptr %ivptr
+  %ptr1 = getelementptr inbounds i8, ptr %ivptr, i64 1
+  %ld1 = load i8, ptr %ptr1
+  %ptr2 = getelementptr inbounds i8, ptr %ivptr, i64 2
+  %ld2 = load i8, ptr %ptr2
+  %ptr3 = getelementptr inbounds i8, ptr %ivptr, i64 3
+  %ld3 = load i8, ptr %ptr3
   %a1 = add i8 %ld0, %ld1
   %a2 = add i8 %a1, %ld2
   %a3 = add i8 %a2, %ld3
-  %dstptr = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %a3, i8* %dstptr
-  %ptr.next = getelementptr inbounds i8, i8* %ivptr, i64 32
+  %dstptr = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %a3, ptr %dstptr
+  %ptr.next = getelementptr inbounds i8, ptr %ivptr, i64 32
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 
@@ -119,28 +116,27 @@ for.end:
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld1 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld2 = load i8
 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction:   %ld3 = load i8
-define void @fun3(i8 *%ptr, i8 *%dst) {
+define void @fun3(ptr %ptr, ptr %dst) {
 entry:
   br label %for.body
 
 for.body:
-  %ivptr = phi i8* [ %ptr.next, %for.body ], [ %ptr, %entry ]
+  %ivptr = phi ptr [ %ptr.next, %for.body ], [ %ptr, %entry ]
   %iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %inc = add i64 %iv, 4
-  %ptr0 = getelementptr inbounds i8, i8* %ivptr, i64 0
-  %ld0 = load i8, i8* %ptr0
-  %ptr1 = getelementptr inbounds i8, i8* %ivptr, i64 1
-  %ld1 = load i8, i8* %ptr1
-  %ptr2 = getelementptr inbounds i8, i8* %ivptr, i64 2
-  %ld2 = load i8, i8* %ptr2
-  %ptr3 = getelementptr inbounds i8, i8* %ivptr, i64 3
-  %ld3 = load i8, i8* %ptr3
+  %ld0 = load i8, ptr %ivptr
+  %ptr1 = getelementptr inbounds i8, ptr %ivptr, i64 1
+  %ld1 = load i8, ptr %ptr1
+  %ptr2 = getelementptr inbounds i8, ptr %ivptr, i64 2
+  %ld2 = load i8, ptr %ptr2
+  %ptr3 = getelementptr inbounds i8, ptr %ivptr, i64 3
+  %ld3 = load i8, ptr %ptr3
   %a1 = add i8 %ld0, %ld1
   %a2 = add i8 %a1, %ld2
   %a3 = add i8 %a2, %ld3
-  %dstptr = getelementptr inbounds i8, i8* %dst, i64 %iv
-  store i8 %a3, i8* %dstptr
-  %ptr.next = getelementptr inbounds i8, i8* %ivptr, i64 30
+  %dstptr = getelementptr inbounds i8, ptr %dst, i64 %iv
+  store i8 %a3, ptr %dstptr
+  %ptr.next = getelementptr inbounds i8, ptr %ivptr, i64 30
   %cmp = icmp eq i64 %inc, 100
   br i1 %cmp, label %for.end, label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
index e629110..ee81af2 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll
@@ -9,16 +9,16 @@
 
 ; Simple case where just the load is interleaved, because the store group
 ; would have gaps.
-define void @fun0(i32* %data, i64 %n) {
+define void @fun0(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %data, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %tmp2 = add i32 %tmp1, 1
-  store i32 %tmp2, i32* %tmp0, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -26,25 +26,25 @@ for.body:
 for.end:
   ret void
 
-; CHECK: LV: Creating an interleave group with:  %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
+; CHECK: LV: Creating an interleave group with:  %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction:   %tmp1 = load i32, ptr %tmp0, align 4
 ;        (vl; vl; vperm)
 }
 
 ; Interleaving of both load and stores.
-define void @fun1(i32* %data, i64 %n) {
+define void @fun1(ptr %data, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
-  %tmp0 = getelementptr inbounds i32, i32* %data, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %data, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
   %i_1  = add i64 %i, 1
-  %tmp2 = getelementptr inbounds i32, i32* %data, i64 %i_1
-  %tmp3 = load i32, i32* %tmp2, align 4
-  store i32 %tmp1, i32* %tmp2, align 4
-  store i32 %tmp3, i32* %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %data, i64 %i_1
+  %tmp3 = load i32, ptr %tmp2, align 4
+  store i32 %tmp1, ptr %tmp2, align 4
+  store i32 %tmp3, ptr %tmp0, align 4
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -52,19 +52,19 @@ for.body:
 for.end:
   ret void
 
-; CHECK: LV: Creating an interleave group with:  store i32 %tmp3, i32* %tmp0, align 4
-; CHECK: LV: Inserted:  store i32 %tmp1, i32* %tmp2, align 4
-; CHECK:     into the interleave group with  store i32 %tmp3, i32* %tmp0, align 4
-; CHECK: LV: Creating an interleave group with:  %tmp3 = load i32, i32* %tmp2, align 4
-; CHECK: LV: Inserted:  %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK:     into the interleave group with  %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: LV: Creating an interleave group with:  store i32 %tmp3, ptr %tmp0, align 4
+; CHECK: LV: Inserted:  store i32 %tmp1, ptr %tmp2, align 4
+; CHECK:     into the interleave group with  store i32 %tmp3, ptr %tmp0, align 4
+; CHECK: LV: Creating an interleave group with:  %tmp3 = load i32, ptr %tmp2, align 4
+; CHECK: LV: Inserted:  %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK:     into the interleave group with  %tmp3 = load i32, ptr %tmp2, align 4
 
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, i32* %tmp0, align 4
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   %tmp1 = load i32, ptr %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %tmp3 = load i32, ptr %tmp2, align 4
 ;            (vl; vl; vperm, vpkg)
 
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   store i32 %tmp1, i32* %tmp2, align 4
-; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp3, i32* %tmp0, align 4
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   store i32 %tmp1, ptr %tmp2, align 4
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction:   store i32 %tmp3, ptr %tmp0, align 4
 ;            (vmrlf; vmrhf; vst; vst)
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll
index 63f42a8..d469178 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/zero_unroll.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -passes=loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s
 
-define i32 @main(i32 %arg, i8** nocapture readnone %arg1) #0 {
+define i32 @main(i32 %arg, ptr nocapture readnone %arg1) #0 {
 ;CHECK: vector.body:
 entry:
   %0 = alloca i8, align 1
@@ -8,7 +8,7 @@ entry:
 
 loop:
   %storemerge.i.i = phi i8 [ 0, %entry ], [ %tmp12.i.i, %loop ]
-  store i8 %storemerge.i.i, i8* %0, align 2
+  store i8 %storemerge.i.i, ptr %0, align 2
   %tmp8.i.i = icmp ult i8 %storemerge.i.i, 8
   %tmp12.i.i = add nuw nsw i8 %storemerge.i.i, 1
   br i1 %tmp8.i.i, label %loop, label %ret
diff --git a/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll b/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll
index 5c094d1..5dbc7722 100644
--- a/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll
+++ b/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll
@@ -8,7 +8,7 @@
 ; VE-NOT: llvm.loop.isvectorized
 ; AVX: llvm.loop.isvectorized
 
-define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32 signext %n) local_unnamed_addr {
+define dso_local void @foo(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i32 signext %n) local_unnamed_addr {
 entry:
   %cmp = icmp sgt i32 %n, 0
   br i1 %cmp, label %omp.inner.for.body.preheader, label %simd.if.end
@@ -19,11 +19,11 @@ omp.inner.for.body.preheader:                     ; preds = %entry
 
 omp.inner.for.body:                               ; preds = %omp.inner.for.body.preheader, %omp.inner.for.body
   %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !6
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !6
   %mul6 = mul nsw i32 %0, 3
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %mul6, i32* %arrayidx8, align 4, !llvm.access.group !6
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %mul6, ptr %arrayidx8, align 4, !llvm.access.group !6
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !7
diff --git a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index b79b7a5..dad1853 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -23,8 +23,8 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK: LV: We can vectorize this loop!
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [255 x i32], [255 x i32]* @a, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [255 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %red.05
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 255
diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
index 5e56b8a..9e20586 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -7,16 +7,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK-LABEL: @read_mod_write_single_ptr(
 ; CHECK: load <8 x float>
 ; CHECK: ret i32
-define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
   %4 = fmul float %3, 3.000000e+00
-  store float %4, float* %2, align 4
+  store float %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -31,16 +31,16 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 ; SLOWMEM32: load <2 x i64>
 ; FASTMEM32: load <4 x i64>
 ; CHECK: ret i32
-define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %3 = load i64, i64* %2, align 4
+  %2 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %3 = load i64, ptr %2, align 4
   %4 = add i64 %3, 3
-  store i64 %4, i64* %2, align 4
+  store i64 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
index e8668e7..44aae47 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/avx512.ll
@@ -19,7 +19,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
 ; CHECK-PREFER-AVX256-NOT: %zmm
 
-define void @f(i32* %a, i32 %n) {
+define void @f(ptr %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body.preheader, label %for.end
@@ -29,8 +29,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %n, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %n, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -54,7 +54,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
 ; CHECK-PREFER-AVX256-NOT: %zmm
 
-define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" {
+define void @g(ptr %a, i32 %n) "prefer-vector-width"="256" {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body.preheader, label %for.end
@@ -64,8 +64,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %n, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %n, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -93,7 +93,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-PREFER-AVX256: epilog
 ; CHECK-PREFER-AVX256: %ymm
 
-define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" {
+define void @h(ptr %a, i32 %n) "prefer-vector-width"="512" {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body.preheader, label %for.end
@@ -103,8 +103,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %n, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %n, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
index 9ea4e6a..a8a60dd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
@@ -57,9 +57,9 @@ define void @test_01() {
   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
   %9 = add i32 %8, 2
   %10 = zext i32 %9 to i64
-  %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
+  %11 = getelementptr inbounds i32, ptr addrspace(1) undef, i64 %10
   %12 = ashr i32 undef, %4
-  store i32 %12, i32 addrspace(1)* %11, align 4
+  store i32 %12, ptr addrspace(1) %11, align 4
   %13 = add i32 %7, 1
   %14 = icmp sgt i32 %13, 61
   br i1 %14, label %._crit_edge.loopexit, label %6
@@ -96,9 +96,9 @@ define void @test_02() {
   %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
   %9 = add i32 %8, 2
   %10 = zext i32 %9 to i64
-  %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
+  %11 = getelementptr inbounds i32, ptr addrspace(1) undef, i64 %10
   %12 = ashr i32 undef, %4
-  store i32 %12, i32 addrspace(1)* %11, align 4
+  store i32 %12, ptr addrspace(1) %11, align 4
   %13 = add i32 %7, 1
   %14 = icmp sgt i32 %13, 610
   br i1 %14, label %._crit_edge.loopexit, label %6
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
index 8c351d5..2d32606 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -15,11 +15,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %shl = ashr i32 %0, 3
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  store i32 %shl, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  store i32 %shl, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 0dd28c7..64f3b1c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -17,16 +17,16 @@ define void @cost_model_1() nounwind uwtable noinline ssp {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 [[IDXPROM1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 [[IDXPROM1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 [[IDXPROM5]]
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 [[IDXPROM5]]
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256
@@ -40,16 +40,16 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %0
-  %1 = load i32, i32* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %0
+  %1 = load i32, ptr %arrayidx, align 8
   %idxprom1 = sext i32 %1 to i64
-  %arrayidx2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %idxprom1
-  %2 = load i32, i32* %arrayidx2, align 4
-  %arrayidx4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx4, align 4
+  %arrayidx2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %idxprom1
+  %2 = load i32, ptr %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx4, align 4
   %idxprom5 = sext i32 %3 to i64
-  %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %idxprom5
-  store i32 %2, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %idxprom5
+  store i32 %2, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -64,7 +64,7 @@ for.end:                                          ; preds = %for.body
 ; then we don't need to extract the pointers out of vector of pointers,
 ; and the vectorization becomes profitable.
 
-define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
+define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 ; CHECK-LABEL: @PR27826(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -104,98 +104,98 @@ define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b
 ; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 416
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 448
 ; CHECK-NEXT:    [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 480
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP35:%.*]] = load float, float* [[TMP19]], align 4
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP20]], align 4
-; CHECK-NEXT:    [[TMP37:%.*]] = load float, float* [[TMP21]], align 4
-; CHECK-NEXT:    [[TMP38:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP19]], align 4
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP21]], align 4
+; CHECK-NEXT:    [[TMP38:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <4 x float> poison, float [[TMP35]], i32 0
 ; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP36]], i32 1
 ; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i32 2
 ; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP38]], i32 3
-; CHECK-NEXT:    [[TMP43:%.*]] = load float, float* [[TMP23]], align 4
-; CHECK-NEXT:    [[TMP44:%.*]] = load float, float* [[TMP24]], align 4
-; CHECK-NEXT:    [[TMP45:%.*]] = load float, float* [[TMP25]], align 4
-; CHECK-NEXT:    [[TMP46:%.*]] = load float, float* [[TMP26]], align 4
+; CHECK-NEXT:    [[TMP43:%.*]] = load float, ptr [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP44:%.*]] = load float, ptr [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP45:%.*]] = load float, ptr [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP46:%.*]] = load float, ptr [[TMP26]], align 4
 ; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x float> poison, float [[TMP43]], i32 0
 ; CHECK-NEXT:    [[TMP48:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i32 1
 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <4 x float> [[TMP48]], float [[TMP45]], i32 2
 ; CHECK-NEXT:    [[TMP50:%.*]] = insertelement <4 x float> [[TMP49]], float [[TMP46]], i32 3
-; CHECK-NEXT:    [[TMP51:%.*]] = load float, float* [[TMP27]], align 4
-; CHECK-NEXT:    [[TMP52:%.*]] = load float, float* [[TMP28]], align 4
-; CHECK-NEXT:    [[TMP53:%.*]] = load float, float* [[TMP29]], align 4
-; CHECK-NEXT:    [[TMP54:%.*]] = load float, float* [[TMP30]], align 4
+; CHECK-NEXT:    [[TMP51:%.*]] = load float, ptr [[TMP27]], align 4
+; CHECK-NEXT:    [[TMP52:%.*]] = load float, ptr [[TMP28]], align 4
+; CHECK-NEXT:    [[TMP53:%.*]] = load float, ptr [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP54:%.*]] = load float, ptr [[TMP30]], align 4
 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x float> poison, float [[TMP51]], i32 0
 ; CHECK-NEXT:    [[TMP56:%.*]] = insertelement <4 x float> [[TMP55]], float [[TMP52]], i32 1
 ; CHECK-NEXT:    [[TMP57:%.*]] = insertelement <4 x float> [[TMP56]], float [[TMP53]], i32 2
 ; CHECK-NEXT:    [[TMP58:%.*]] = insertelement <4 x float> [[TMP57]], float [[TMP54]], i32 3
-; CHECK-NEXT:    [[TMP59:%.*]] = load float, float* [[TMP31]], align 4
-; CHECK-NEXT:    [[TMP60:%.*]] = load float, float* [[TMP32]], align 4
-; CHECK-NEXT:    [[TMP61:%.*]] = load float, float* [[TMP33]], align 4
-; CHECK-NEXT:    [[TMP62:%.*]] = load float, float* [[TMP34]], align 4
+; CHECK-NEXT:    [[TMP59:%.*]] = load float, ptr [[TMP31]], align 4
+; CHECK-NEXT:    [[TMP60:%.*]] = load float, ptr [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP61:%.*]] = load float, ptr [[TMP33]], align 4
+; CHECK-NEXT:    [[TMP62:%.*]] = load float, ptr [[TMP34]], align 4
 ; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <4 x float> poison, float [[TMP59]], i32 0
 ; CHECK-NEXT:    [[TMP64:%.*]] = insertelement <4 x float> [[TMP63]], float [[TMP60]], i32 1
 ; CHECK-NEXT:    [[TMP65:%.*]] = insertelement <4 x float> [[TMP64]], float [[TMP61]], i32 2
 ; CHECK-NEXT:    [[TMP66:%.*]] = insertelement <4 x float> [[TMP65]], float [[TMP62]], i32 3
-; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP8]]
-; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP81:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP83:%.*]] = load float, float* [[TMP67]], align 4
-; CHECK-NEXT:    [[TMP84:%.*]] = load float, float* [[TMP68]], align 4
-; CHECK-NEXT:    [[TMP85:%.*]] = load float, float* [[TMP69]], align 4
-; CHECK-NEXT:    [[TMP86:%.*]] = load float, float* [[TMP70]], align 4
+; CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP81:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP83:%.*]] = load float, ptr [[TMP67]], align 4
+; CHECK-NEXT:    [[TMP84:%.*]] = load float, ptr [[TMP68]], align 4
+; CHECK-NEXT:    [[TMP85:%.*]] = load float, ptr [[TMP69]], align 4
+; CHECK-NEXT:    [[TMP86:%.*]] = load float, ptr [[TMP70]], align 4
 ; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <4 x float> poison, float [[TMP83]], i32 0
 ; CHECK-NEXT:    [[TMP88:%.*]] = insertelement <4 x float> [[TMP87]], float [[TMP84]], i32 1
 ; CHECK-NEXT:    [[TMP89:%.*]] = insertelement <4 x float> [[TMP88]], float [[TMP85]], i32 2
 ; CHECK-NEXT:    [[TMP90:%.*]] = insertelement <4 x float> [[TMP89]], float [[TMP86]], i32 3
-; CHECK-NEXT:    [[TMP91:%.*]] = load float, float* [[TMP71]], align 4
-; CHECK-NEXT:    [[TMP92:%.*]] = load float, float* [[TMP72]], align 4
-; CHECK-NEXT:    [[TMP93:%.*]] = load float, float* [[TMP73]], align 4
-; CHECK-NEXT:    [[TMP94:%.*]] = load float, float* [[TMP74]], align 4
+; CHECK-NEXT:    [[TMP91:%.*]] = load float, ptr [[TMP71]], align 4
+; CHECK-NEXT:    [[TMP92:%.*]] = load float, ptr [[TMP72]], align 4
+; CHECK-NEXT:    [[TMP93:%.*]] = load float, ptr [[TMP73]], align 4
+; CHECK-NEXT:    [[TMP94:%.*]] = load float, ptr [[TMP74]], align 4
 ; CHECK-NEXT:    [[TMP95:%.*]] = insertelement <4 x float> poison, float [[TMP91]], i32 0
 ; CHECK-NEXT:    [[TMP96:%.*]] = insertelement <4 x float> [[TMP95]], float [[TMP92]], i32 1
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x float> [[TMP96]], float [[TMP93]], i32 2
 ; CHECK-NEXT:    [[TMP98:%.*]] = insertelement <4 x float> [[TMP97]], float [[TMP94]], i32 3
-; CHECK-NEXT:    [[TMP99:%.*]] = load float, float* [[TMP75]], align 4
-; CHECK-NEXT:    [[TMP100:%.*]] = load float, float* [[TMP76]], align 4
-; CHECK-NEXT:    [[TMP101:%.*]] = load float, float* [[TMP77]], align 4
-; CHECK-NEXT:    [[TMP102:%.*]] = load float, float* [[TMP78]], align 4
+; CHECK-NEXT:    [[TMP99:%.*]] = load float, ptr [[TMP75]], align 4
+; CHECK-NEXT:    [[TMP100:%.*]] = load float, ptr [[TMP76]], align 4
+; CHECK-NEXT:    [[TMP101:%.*]] = load float, ptr [[TMP77]], align 4
+; CHECK-NEXT:    [[TMP102:%.*]] = load float, ptr [[TMP78]], align 4
 ; CHECK-NEXT:    [[TMP103:%.*]] = insertelement <4 x float> poison, float [[TMP99]], i32 0
 ; CHECK-NEXT:    [[TMP104:%.*]] = insertelement <4 x float> [[TMP103]], float [[TMP100]], i32 1
 ; CHECK-NEXT:    [[TMP105:%.*]] = insertelement <4 x float> [[TMP104]], float [[TMP101]], i32 2
 ; CHECK-NEXT:    [[TMP106:%.*]] = insertelement <4 x float> [[TMP105]], float [[TMP102]], i32 3
-; CHECK-NEXT:    [[TMP107:%.*]] = load float, float* [[TMP79]], align 4
-; CHECK-NEXT:    [[TMP108:%.*]] = load float, float* [[TMP80]], align 4
-; CHECK-NEXT:    [[TMP109:%.*]] = load float, float* [[TMP81]], align 4
-; CHECK-NEXT:    [[TMP110:%.*]] = load float, float* [[TMP82]], align 4
+; CHECK-NEXT:    [[TMP107:%.*]] = load float, ptr [[TMP79]], align 4
+; CHECK-NEXT:    [[TMP108:%.*]] = load float, ptr [[TMP80]], align 4
+; CHECK-NEXT:    [[TMP109:%.*]] = load float, ptr [[TMP81]], align 4
+; CHECK-NEXT:    [[TMP110:%.*]] = load float, ptr [[TMP82]], align 4
 ; CHECK-NEXT:    [[TMP111:%.*]] = insertelement <4 x float> poison, float [[TMP107]], i32 0
 ; CHECK-NEXT:    [[TMP112:%.*]] = insertelement <4 x float> [[TMP111]], float [[TMP108]], i32 1
 ; CHECK-NEXT:    [[TMP113:%.*]] = insertelement <4 x float> [[TMP112]], float [[TMP109]], i32 2
@@ -225,10 +225,10 @@ define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b
 ; CHECK:       for:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR]] ]
 ; CHECK-NEXT:    [[S_02:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD4:%.*]], [[FOR]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[T1:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[T2:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[T1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[T2:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[T1]], [[S_02]]
 ; CHECK-NEXT:    [[ADD4]] = fadd fast float [[ADD]], [[T2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 32
@@ -252,10 +252,10 @@ preheader:
 for:
   %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
   %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %t1 = load float, float* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %t2 = load float, float* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %t1 = load float, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %t2 = load float, ptr %arrayidx3, align 4
   %add = fadd fast float %t1, %s.02
   %add4 = fadd fast float %add, %t2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index e925e63..43c5cbc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; This test checks auto-vectorization with FP induction variable.
 ; FMF is required on the IR instructions.
 
-;void fp_iv_loop1(float * __restrict__ A, int N) {
+;void fp_iv_loop1(ptr __restrict__ A, int N) {
 ;  float x = 1.0;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ;  }
 ;}
 
-define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
+define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-LABEL: @fp_iv_loop1(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -44,69 +44,53 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
-; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND]], <8 x float>* [[TMP6]], align 4
-; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 8
-; AUTO_VEC-NEXT:    [[TMP8:%.*]] = bitcast float* [[TMP7]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD]], <8 x float>* [[TMP8]], align 4
-; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 16
-; AUTO_VEC-NEXT:    [[TMP10:%.*]] = bitcast float* [[TMP9]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2]], <8 x float>* [[TMP10]], align 4
-; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 24
-; AUTO_VEC-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3]], <8 x float>* [[TMP12]], align 4
+; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND]], ptr [[TMP5]], align 4
+; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD]], ptr [[TMP7]], align 4
+; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2]], ptr [[TMP9]], align 4
+; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3]], ptr [[TMP11]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01, float 2.000000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01, float 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01, float 2.800000e+01>
-; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT]]
-; AUTO_VEC-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT]], <8 x float>* [[TMP14]], align 4
-; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 8
-; AUTO_VEC-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_1]], <8 x float>* [[TMP16]], align 4
-; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 16
-; AUTO_VEC-NEXT:    [[TMP18:%.*]] = bitcast float* [[TMP17]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_1]], <8 x float>* [[TMP18]], align 4
-; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 24
-; AUTO_VEC-NEXT:    [[TMP20:%.*]] = bitcast float* [[TMP19]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_1]], <8 x float>* [[TMP20]], align 4
+; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_NEXT]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT]], ptr [[TMP13]], align 4
+; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_1]], ptr [[TMP15]], align 4
+; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_1]], ptr [[TMP17]], align 4
+; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_1]], ptr [[TMP19]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_1:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01, float 3.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01, float 3.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01, float 4.000000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01, float 4.400000e+01>
-; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_1]]
-; AUTO_VEC-NEXT:    [[TMP22:%.*]] = bitcast float* [[TMP21]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_1]], <8 x float>* [[TMP22]], align 4
-; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 8
-; AUTO_VEC-NEXT:    [[TMP24:%.*]] = bitcast float* [[TMP23]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_2]], <8 x float>* [[TMP24]], align 4
-; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 16
-; AUTO_VEC-NEXT:    [[TMP26:%.*]] = bitcast float* [[TMP25]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_2]], <8 x float>* [[TMP26]], align 4
-; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 24
-; AUTO_VEC-NEXT:    [[TMP28:%.*]] = bitcast float* [[TMP27]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_2]], <8 x float>* [[TMP28]], align 4
+; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_1]], ptr [[TMP21]], align 4
+; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_2]], ptr [[TMP23]], align 4
+; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_2]], ptr [[TMP25]], align 4
+; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_2]], ptr [[TMP27]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 96
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01, float 4.800000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01, float 5.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01, float 5.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01>
-; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_2]]
-; AUTO_VEC-NEXT:    [[TMP30:%.*]] = bitcast float* [[TMP29]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_2]], <8 x float>* [[TMP30]], align 4
-; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 8
-; AUTO_VEC-NEXT:    [[TMP32:%.*]] = bitcast float* [[TMP31]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_3]], <8 x float>* [[TMP32]], align 4
-; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 16
-; AUTO_VEC-NEXT:    [[TMP34:%.*]] = bitcast float* [[TMP33]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_3]], <8 x float>* [[TMP34]], align 4
-; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 24
-; AUTO_VEC-NEXT:    [[TMP36:%.*]] = bitcast float* [[TMP35]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_3]], <8 x float>* [[TMP36]], align 4
+; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_NEXT_2]], ptr [[TMP29]], align 4
+; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_3]], ptr [[TMP31]], align 4
+; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_3]], ptr [[TMP33]], align 4
+; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_3]], ptr [[TMP35]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 128
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_3]] = fadd fast <8 x float> [[VEC_IND]], <float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01, float 6.400000e+01>
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
@@ -124,18 +108,14 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
-; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_EPIL]]
-; AUTO_VEC-NEXT:    [[TMP38:%.*]] = bitcast float* [[TMP37]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_EPIL]], <8 x float>* [[TMP38]], align 4
-; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 8
-; AUTO_VEC-NEXT:    [[TMP40:%.*]] = bitcast float* [[TMP39]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_EPIL]], <8 x float>* [[TMP40]], align 4
-; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 16
-; AUTO_VEC-NEXT:    [[TMP42:%.*]] = bitcast float* [[TMP41]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_EPIL]], <8 x float>* [[TMP42]], align 4
-; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr inbounds float, float* [[TMP37]], i64 24
-; AUTO_VEC-NEXT:    [[TMP44:%.*]] = bitcast float* [[TMP43]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_EPIL]], <8 x float>* [[TMP44]], align 4
+; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX_EPIL]]
+; AUTO_VEC-NEXT:    store <8 x float> [[VEC_IND_EPIL]], ptr [[TMP37]], align 4
+; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 8
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD_EPIL]], ptr [[TMP39]], align 4
+; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 16
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2_EPIL]], ptr [[TMP41]], align 4
+; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr inbounds float, ptr [[TMP37]], i64 24
+; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3_EPIL]], ptr [[TMP43]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_EPIL]] = fadd fast <8 x float> [[VEC_IND_EPIL]], <float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01>
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -147,8 +127,8 @@ define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; AUTO_VEC-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AUTO_VEC-NEXT:    [[TMP45:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
@@ -166,8 +146,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd fast float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -183,7 +163,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 
 ; The same as the previous, but FP operation has no FMF.
 ; Vectorization should be rejected.
-;void fp_iv_loop2(float * __restrict__ A, int N) {
+;void fp_iv_loop2(ptr __restrict__ A, int N) {
 ;  float x = 1.0;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -191,7 +171,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;  }
 ;}
 
-define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-LABEL: @fp_iv_loop2(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -208,36 +188,36 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[X_06:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER_NEW]] ], [ [[CONV1_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDVARS_IV]]
-; AUTO_VEC-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1:%.*]] = fadd float [[X_06]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1
-; AUTO_VEC-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]]
-; AUTO_VEC-NEXT:    store float [[CONV1]], float* [[ARRAYIDX_1]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
+; AUTO_VEC-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX_1]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_1:%.*]] = fadd float [[CONV1]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2
-; AUTO_VEC-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; AUTO_VEC-NEXT:    store float [[CONV1_1]], float* [[ARRAYIDX_2]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
+; AUTO_VEC-NEXT:    store float [[CONV1_1]], ptr [[ARRAYIDX_2]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_2:%.*]] = fadd float [[CONV1_1]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3
-; AUTO_VEC-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; AUTO_VEC-NEXT:    store float [[CONV1_2]], float* [[ARRAYIDX_3]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
+; AUTO_VEC-NEXT:    store float [[CONV1_2]], ptr [[ARRAYIDX_3]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_3:%.*]] = fadd float [[CONV1_2]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4
-; AUTO_VEC-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; AUTO_VEC-NEXT:    store float [[CONV1_3]], float* [[ARRAYIDX_4]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
+; AUTO_VEC-NEXT:    store float [[CONV1_3]], ptr [[ARRAYIDX_4]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_4:%.*]] = fadd float [[CONV1_3]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5
-; AUTO_VEC-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; AUTO_VEC-NEXT:    store float [[CONV1_4]], float* [[ARRAYIDX_5]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
+; AUTO_VEC-NEXT:    store float [[CONV1_4]], ptr [[ARRAYIDX_5]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_5:%.*]] = fadd float [[CONV1_4]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6
-; AUTO_VEC-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; AUTO_VEC-NEXT:    store float [[CONV1_5]], float* [[ARRAYIDX_6]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
+; AUTO_VEC-NEXT:    store float [[CONV1_5]], ptr [[ARRAYIDX_6]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_6:%.*]] = fadd float [[CONV1_5]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7
-; AUTO_VEC-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; AUTO_VEC-NEXT:    store float [[CONV1_6]], float* [[ARRAYIDX_7]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
+; AUTO_VEC-NEXT:    store float [[CONV1_6]], ptr [[ARRAYIDX_7]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_7]] = fadd float [[CONV1_6]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
@@ -252,8 +232,8 @@ define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) {
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[X_06_UNR]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ], [ 0, [[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_EPIL]]
-; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], float* [[ARRAYIDX_EPIL]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
+; AUTO_VEC-NEXT:    store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4
 ; AUTO_VEC-NEXT:    [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -272,8 +252,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -287,7 +267,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-define double @external_use_with_fast_math(double* %a, i64 %n) {
+define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-LABEL: @external_use_with_fast_math(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -313,69 +293,53 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
-; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP6:%.*]] = bitcast double* [[TMP5]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND]], <4 x double>* [[TMP6]], align 8
-; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[TMP5]], i64 4
-; AUTO_VEC-NEXT:    [[TMP8:%.*]] = bitcast double* [[TMP7]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD]], <4 x double>* [[TMP8]], align 8
-; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr double, double* [[TMP5]], i64 8
-; AUTO_VEC-NEXT:    [[TMP10:%.*]] = bitcast double* [[TMP9]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2]], <4 x double>* [[TMP10]], align 8
-; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr double, double* [[TMP5]], i64 12
-; AUTO_VEC-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3]], <4 x double>* [[TMP12]], align 8
+; AUTO_VEC-NEXT:    [[TMP5:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND]], ptr [[TMP5]], align 8
+; AUTO_VEC-NEXT:    [[TMP7:%.*]] = getelementptr double, ptr [[TMP5]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD]], ptr [[TMP7]], align 8
+; AUTO_VEC-NEXT:    [[TMP9:%.*]] = getelementptr double, ptr [[TMP5]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2]], ptr [[TMP9]], align 8
+; AUTO_VEC-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP5]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3]], ptr [[TMP11]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 16
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 6.000000e+01, double 6.000000e+01, double 6.000000e+01, double 6.000000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 7.200000e+01, double 7.200000e+01, double 7.200000e+01, double 7.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 8.400000e+01, double 8.400000e+01, double 8.400000e+01, double 8.400000e+01>
-; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT]]
-; AUTO_VEC-NEXT:    [[TMP14:%.*]] = bitcast double* [[TMP13]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT]], <4 x double>* [[TMP14]], align 8
-; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr double, double* [[TMP13]], i64 4
-; AUTO_VEC-NEXT:    [[TMP16:%.*]] = bitcast double* [[TMP15]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_1]], <4 x double>* [[TMP16]], align 8
-; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr double, double* [[TMP13]], i64 8
-; AUTO_VEC-NEXT:    [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_1]], <4 x double>* [[TMP18]], align 8
-; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr double, double* [[TMP13]], i64 12
-; AUTO_VEC-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP19]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_1]], <4 x double>* [[TMP20]], align 8
+; AUTO_VEC-NEXT:    [[TMP13:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_NEXT]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT]], ptr [[TMP13]], align 8
+; AUTO_VEC-NEXT:    [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_1]], ptr [[TMP15]], align 8
+; AUTO_VEC-NEXT:    [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_1]], ptr [[TMP17]], align 8
+; AUTO_VEC-NEXT:    [[TMP19:%.*]] = getelementptr double, ptr [[TMP13]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_1]], ptr [[TMP19]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_1:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 9.600000e+01, double 9.600000e+01, double 9.600000e+01, double 9.600000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.080000e+02, double 1.080000e+02, double 1.080000e+02, double 1.080000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+02, double 1.200000e+02, double 1.200000e+02, double 1.200000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.320000e+02, double 1.320000e+02, double 1.320000e+02, double 1.320000e+02>
-; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_1]]
-; AUTO_VEC-NEXT:    [[TMP22:%.*]] = bitcast double* [[TMP21]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_1]], <4 x double>* [[TMP22]], align 8
-; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr double, double* [[TMP21]], i64 4
-; AUTO_VEC-NEXT:    [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_2]], <4 x double>* [[TMP24]], align 8
-; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr double, double* [[TMP21]], i64 8
-; AUTO_VEC-NEXT:    [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_2]], <4 x double>* [[TMP26]], align 8
-; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr double, double* [[TMP21]], i64 12
-; AUTO_VEC-NEXT:    [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_2]], <4 x double>* [[TMP28]], align 8
+; AUTO_VEC-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_1]], ptr [[TMP21]], align 8
+; AUTO_VEC-NEXT:    [[TMP23:%.*]] = getelementptr double, ptr [[TMP21]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_2]], ptr [[TMP23]], align 8
+; AUTO_VEC-NEXT:    [[TMP25:%.*]] = getelementptr double, ptr [[TMP21]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_2]], ptr [[TMP25]], align 8
+; AUTO_VEC-NEXT:    [[TMP27:%.*]] = getelementptr double, ptr [[TMP21]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_2]], ptr [[TMP27]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 48
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.440000e+02, double 1.440000e+02, double 1.440000e+02, double 1.440000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.560000e+02, double 1.560000e+02, double 1.560000e+02, double 1.560000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.680000e+02, double 1.680000e+02, double 1.680000e+02, double 1.680000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.800000e+02, double 1.800000e+02, double 1.800000e+02, double 1.800000e+02>
-; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_NEXT_2]]
-; AUTO_VEC-NEXT:    [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_2]], <4 x double>* [[TMP30]], align 8
-; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr double, double* [[TMP29]], i64 4
-; AUTO_VEC-NEXT:    [[TMP32:%.*]] = bitcast double* [[TMP31]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_3]], <4 x double>* [[TMP32]], align 8
-; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr double, double* [[TMP29]], i64 8
-; AUTO_VEC-NEXT:    [[TMP34:%.*]] = bitcast double* [[TMP33]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_3]], <4 x double>* [[TMP34]], align 8
-; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr double, double* [[TMP29]], i64 12
-; AUTO_VEC-NEXT:    [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_3]], <4 x double>* [[TMP36]], align 8
+; AUTO_VEC-NEXT:    [[TMP29:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_NEXT_2]], ptr [[TMP29]], align 8
+; AUTO_VEC-NEXT:    [[TMP31:%.*]] = getelementptr double, ptr [[TMP29]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_3]], ptr [[TMP31]], align 8
+; AUTO_VEC-NEXT:    [[TMP33:%.*]] = getelementptr double, ptr [[TMP29]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_3]], ptr [[TMP33]], align 8
+; AUTO_VEC-NEXT:    [[TMP35:%.*]] = getelementptr double, ptr [[TMP29]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_3]], ptr [[TMP35]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_3]] = add nuw i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_3]] = fadd fast <4 x double> [[VEC_IND]], <double 1.920000e+02, double 1.920000e+02, double 1.920000e+02, double 1.920000e+02>
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_3]] = add i64 [[NITER]], 4
@@ -393,18 +357,14 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
-; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr double, double* [[A]], i64 [[INDEX_EPIL]]
-; AUTO_VEC-NEXT:    [[TMP38:%.*]] = bitcast double* [[TMP37]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_EPIL]], <4 x double>* [[TMP38]], align 8
-; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr double, double* [[TMP37]], i64 4
-; AUTO_VEC-NEXT:    [[TMP40:%.*]] = bitcast double* [[TMP39]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_EPIL]], <4 x double>* [[TMP40]], align 8
-; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr double, double* [[TMP37]], i64 8
-; AUTO_VEC-NEXT:    [[TMP42:%.*]] = bitcast double* [[TMP41]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_EPIL]], <4 x double>* [[TMP42]], align 8
-; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr double, double* [[TMP37]], i64 12
-; AUTO_VEC-NEXT:    [[TMP44:%.*]] = bitcast double* [[TMP43]] to <4 x double>*
-; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_EPIL]], <4 x double>* [[TMP44]], align 8
+; AUTO_VEC-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX_EPIL]]
+; AUTO_VEC-NEXT:    store <4 x double> [[VEC_IND_EPIL]], ptr [[TMP37]], align 8
+; AUTO_VEC-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP37]], i64 4
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_EPIL]], ptr [[TMP39]], align 8
+; AUTO_VEC-NEXT:    [[TMP41:%.*]] = getelementptr double, ptr [[TMP37]], i64 8
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD2_EPIL]], ptr [[TMP41]], align 8
+; AUTO_VEC-NEXT:    [[TMP43:%.*]] = getelementptr double, ptr [[TMP37]], i64 12
+; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD3_EPIL]], ptr [[TMP43]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_EPIL]] = add nuw i64 [[INDEX_EPIL]], 16
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_EPIL]] = fadd fast <4 x double> [[VEC_IND_EPIL]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -419,8 +379,8 @@ define double @external_use_with_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, double* [[A]], i64 [[I]]
-; AUTO_VEC-NEXT:    store double [[J]], double* [[T0]], align 8
+; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
+; AUTO_VEC-NEXT:    store double [[J]], ptr [[T0]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT]] = fadd fast double [[J]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]]
@@ -435,8 +395,8 @@ entry:
 for.body:
   %i = phi i64 [ 0, %entry ], [%i.next, %for.body]
   %j = phi double [ 0.0, %entry ], [ %j.next, %for.body ]
-  %t0 = getelementptr double, double* %a, i64 %i
-  store double %j, double* %t0
+  %t0 = getelementptr double, ptr %a, i64 %i
+  store double %j, ptr %t0
   %i.next = add i64 %i, 1
   %j.next = fadd fast double %j, 3.0
   %cond = icmp slt i64 %i.next, %n
@@ -447,7 +407,7 @@ for.end:
   ret double %t1
 }
 
-define double @external_use_without_fast_math(double* %a, i64 %n) {
+define double @external_use_without_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-LABEL: @external_use_without_fast_math(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -462,36 +422,36 @@ define double @external_use_without_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], [[FOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, double* [[A:%.*]], i64 [[I]]
-; AUTO_VEC-NEXT:    store double [[J]], double* [[T0]], align 8
+; AUTO_VEC-NEXT:    [[T0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[I]]
+; AUTO_VEC-NEXT:    store double [[J]], ptr [[T0]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT:%.*]] = or i64 [[I]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT:%.*]] = fadd double [[J]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_1:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT]], double* [[T0_1]], align 8
+; AUTO_VEC-NEXT:    [[T0_1:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT]], ptr [[T0_1]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_1:%.*]] = or i64 [[I]], 2
 ; AUTO_VEC-NEXT:    [[J_NEXT_1:%.*]] = fadd double [[J_NEXT]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_2:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_1]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_1]], double* [[T0_2]], align 8
+; AUTO_VEC-NEXT:    [[T0_2:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_1]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_1]], ptr [[T0_2]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_2:%.*]] = or i64 [[I]], 3
 ; AUTO_VEC-NEXT:    [[J_NEXT_2:%.*]] = fadd double [[J_NEXT_1]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_3:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_2]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_2]], double* [[T0_3]], align 8
+; AUTO_VEC-NEXT:    [[T0_3:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_2]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_2]], ptr [[T0_3]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_3:%.*]] = or i64 [[I]], 4
 ; AUTO_VEC-NEXT:    [[J_NEXT_3:%.*]] = fadd double [[J_NEXT_2]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_4:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_3]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_3]], double* [[T0_4]], align 8
+; AUTO_VEC-NEXT:    [[T0_4:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_3]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_3]], ptr [[T0_4]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_4:%.*]] = or i64 [[I]], 5
 ; AUTO_VEC-NEXT:    [[J_NEXT_4:%.*]] = fadd double [[J_NEXT_3]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_5:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_4]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_4]], double* [[T0_5]], align 8
+; AUTO_VEC-NEXT:    [[T0_5:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_4]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_4]], ptr [[T0_5]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_5:%.*]] = or i64 [[I]], 6
 ; AUTO_VEC-NEXT:    [[J_NEXT_5:%.*]] = fadd double [[J_NEXT_4]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_6:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_5]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_5]], double* [[T0_6]], align 8
+; AUTO_VEC-NEXT:    [[T0_6:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_5]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_5]], ptr [[T0_6]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_6:%.*]] = or i64 [[I]], 7
 ; AUTO_VEC-NEXT:    [[J_NEXT_6:%.*]] = fadd double [[J_NEXT_5]], 3.000000e+00
-; AUTO_VEC-NEXT:    [[T0_7:%.*]] = getelementptr double, double* [[A]], i64 [[I_NEXT_6]]
-; AUTO_VEC-NEXT:    store double [[J_NEXT_6]], double* [[T0_7]], align 8
+; AUTO_VEC-NEXT:    [[T0_7:%.*]] = getelementptr double, ptr [[A]], i64 [[I_NEXT_6]]
+; AUTO_VEC-NEXT:    store double [[J_NEXT_6]], ptr [[T0_7]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_7]] = add nuw nsw i64 [[I]], 8
 ; AUTO_VEC-NEXT:    [[J_NEXT_7]] = fadd double [[J_NEXT_6]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_7]] = add i64 [[NITER]], 8
@@ -507,8 +467,8 @@ define double @external_use_without_fast_math(double* %a, i64 %n) {
 ; AUTO_VEC-NEXT:    [[I_EPIL:%.*]] = phi i64 [ [[I_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[I_UNR]], [[FOR_END_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[J_EPIL:%.*]] = phi double [ [[J_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[J_UNR]], [[FOR_END_UNR_LCSSA]] ]
 ; AUTO_VEC-NEXT:    [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ], [ 0, [[FOR_END_UNR_LCSSA]] ]
-; AUTO_VEC-NEXT:    [[T0_EPIL:%.*]] = getelementptr double, double* [[A]], i64 [[I_EPIL]]
-; AUTO_VEC-NEXT:    store double [[J_EPIL]], double* [[T0_EPIL]], align 8
+; AUTO_VEC-NEXT:    [[T0_EPIL:%.*]] = getelementptr double, ptr [[A]], i64 [[I_EPIL]]
+; AUTO_VEC-NEXT:    store double [[J_EPIL]], ptr [[T0_EPIL]], align 8
 ; AUTO_VEC-NEXT:    [[I_NEXT_EPIL]] = add nuw nsw i64 [[I_EPIL]], 1
 ; AUTO_VEC-NEXT:    [[J_NEXT_EPIL]] = fadd double [[J_EPIL]], 3.000000e+00
 ; AUTO_VEC-NEXT:    [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
@@ -524,8 +484,8 @@ entry:
 for.body:
   %i = phi i64 [ 0, %entry ], [%i.next, %for.body]
   %j = phi double [ 0.0, %entry ], [ %j.next, %for.body ]
-  %t0 = getelementptr double, double* %a, i64 %i
-  store double %j, double* %t0
+  %t0 = getelementptr double, ptr %a, i64 %i
+  store double %j, ptr %t0
   %i.next = add i64 %i, 1
   %j.next = fadd double %j, 3.0
   %cond = icmp slt i64 %i.next, %n
@@ -536,7 +496,7 @@ for.end:
   ret double %t1
 }
 
-;;  void fadd_induction(float *p, unsigned N) {
+;;  void fadd_induction(ptr p, unsigned N) {
 ;;    float x = 1.0f;
 ;;    for (unsigned i=0; i!=N; ++i) {
 ;;      p[i] = p[i] + x;
@@ -544,7 +504,7 @@ for.end:
 ;;    }
 ;;  }
 
-define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
+define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-LABEL: @fadd_reassoc_FMF(
 ; AUTO_VEC-NEXT:  entry:
 ; AUTO_VEC-NEXT:    [[CMP_NOT11:%.*]] = icmp eq i32 [[N:%.*]], 0
@@ -574,59 +534,43 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 [[INDEX]]
-; AUTO_VEC-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, <8 x float>* [[TMP7]], align 4
-; AUTO_VEC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 8
-; AUTO_VEC-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x float>, <8 x float>* [[TMP9]], align 4
-; AUTO_VEC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 16
-; AUTO_VEC-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x float>, <8 x float>* [[TMP11]], align 4
-; AUTO_VEC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 24
-; AUTO_VEC-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x float>, <8 x float>* [[TMP13]], align 4
+; AUTO_VEC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
+; AUTO_VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP6]], align 4
+; AUTO_VEC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8
+; AUTO_VEC-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x float>, ptr [[TMP8]], align 4
+; AUTO_VEC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 16
+; AUTO_VEC-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x float>, ptr [[TMP10]], align 4
+; AUTO_VEC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 24
+; AUTO_VEC-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x float>, ptr [[TMP12]], align 4
 ; AUTO_VEC-NEXT:    [[TMP14:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], [[WIDE_LOAD]]
 ; AUTO_VEC-NEXT:    [[TMP15:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], [[WIDE_LOAD5]]
 ; AUTO_VEC-NEXT:    [[TMP16:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], [[WIDE_LOAD6]]
 ; AUTO_VEC-NEXT:    [[TMP17:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3]], [[WIDE_LOAD7]]
-; AUTO_VEC-NEXT:    [[TMP18:%.*]] = bitcast float* [[TMP6]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP14]], <8 x float>* [[TMP18]], align 4
-; AUTO_VEC-NEXT:    [[TMP19:%.*]] = bitcast float* [[TMP8]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP15]], <8 x float>* [[TMP19]], align 4
-; AUTO_VEC-NEXT:    [[TMP20:%.*]] = bitcast float* [[TMP10]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP16]], <8 x float>* [[TMP20]], align 4
-; AUTO_VEC-NEXT:    [[TMP21:%.*]] = bitcast float* [[TMP12]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP17]], <8 x float>* [[TMP21]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP14]], ptr [[TMP6]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP15]], ptr [[TMP8]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP16]], ptr [[TMP10]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP17]], ptr [[TMP12]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 32
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD_1:%.*]] = fadd reassoc <8 x float> [[VEC_IND_NEXT]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_1:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_1]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_1:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_1]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[P]], i64 [[INDEX_NEXT]]
-; AUTO_VEC-NEXT:    [[TMP23:%.*]] = bitcast float* [[TMP22]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD_1:%.*]] = load <8 x float>, <8 x float>* [[TMP23]], align 4
-; AUTO_VEC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 8
-; AUTO_VEC-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD5_1:%.*]] = load <8 x float>, <8 x float>* [[TMP25]], align 4
-; AUTO_VEC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 16
-; AUTO_VEC-NEXT:    [[TMP27:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD6_1:%.*]] = load <8 x float>, <8 x float>* [[TMP27]], align 4
-; AUTO_VEC-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 24
-; AUTO_VEC-NEXT:    [[TMP29:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD7_1:%.*]] = load <8 x float>, <8 x float>* [[TMP29]], align 4
+; AUTO_VEC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX_NEXT]]
+; AUTO_VEC-NEXT:    [[WIDE_LOAD_1:%.*]] = load <8 x float>, ptr [[TMP22]], align 4
+; AUTO_VEC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 8
+; AUTO_VEC-NEXT:    [[WIDE_LOAD5_1:%.*]] = load <8 x float>, ptr [[TMP24]], align 4
+; AUTO_VEC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 16
+; AUTO_VEC-NEXT:    [[WIDE_LOAD6_1:%.*]] = load <8 x float>, ptr [[TMP26]], align 4
+; AUTO_VEC-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 24
+; AUTO_VEC-NEXT:    [[WIDE_LOAD7_1:%.*]] = load <8 x float>, ptr [[TMP28]], align 4
 ; AUTO_VEC-NEXT:    [[TMP30:%.*]] = fadd reassoc <8 x float> [[VEC_IND_NEXT]], [[WIDE_LOAD_1]]
 ; AUTO_VEC-NEXT:    [[TMP31:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_1]], [[WIDE_LOAD5_1]]
 ; AUTO_VEC-NEXT:    [[TMP32:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_1]], [[WIDE_LOAD6_1]]
 ; AUTO_VEC-NEXT:    [[TMP33:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3_1]], [[WIDE_LOAD7_1]]
-; AUTO_VEC-NEXT:    [[TMP34:%.*]] = bitcast float* [[TMP22]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP30]], <8 x float>* [[TMP34]], align 4
-; AUTO_VEC-NEXT:    [[TMP35:%.*]] = bitcast float* [[TMP24]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP31]], <8 x float>* [[TMP35]], align 4
-; AUTO_VEC-NEXT:    [[TMP36:%.*]] = bitcast float* [[TMP26]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP32]], <8 x float>* [[TMP36]], align 4
-; AUTO_VEC-NEXT:    [[TMP37:%.*]] = bitcast float* [[TMP28]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP33]], <8 x float>* [[TMP37]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP30]], ptr [[TMP22]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP31]], ptr [[TMP24]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP32]], ptr [[TMP26]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP33]], ptr [[TMP28]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT_1]] = add nuw i64 [[INDEX]], 64
 ; AUTO_VEC-NEXT:    [[VEC_IND_NEXT_1]] = fadd reassoc <8 x float> [[STEP_ADD3_1]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
@@ -641,30 +585,22 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    [[STEP_ADD_EPIL:%.*]] = fadd reassoc <8 x float> [[VEC_IND_UNR]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD2_EPIL:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_EPIL]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
 ; AUTO_VEC-NEXT:    [[STEP_ADD3_EPIL:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_EPIL]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, float* [[P]], i64 [[INDEX_UNR]]
-; AUTO_VEC-NEXT:    [[TMP39:%.*]] = bitcast float* [[TMP38]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP39]], align 4
-; AUTO_VEC-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 8
-; AUTO_VEC-NEXT:    [[TMP41:%.*]] = bitcast float* [[TMP40]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD5_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP41]], align 4
-; AUTO_VEC-NEXT:    [[TMP42:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 16
-; AUTO_VEC-NEXT:    [[TMP43:%.*]] = bitcast float* [[TMP42]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD6_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP43]], align 4
-; AUTO_VEC-NEXT:    [[TMP44:%.*]] = getelementptr inbounds float, float* [[TMP38]], i64 24
-; AUTO_VEC-NEXT:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to <8 x float>*
-; AUTO_VEC-NEXT:    [[WIDE_LOAD7_EPIL:%.*]] = load <8 x float>, <8 x float>* [[TMP45]], align 4
+; AUTO_VEC-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX_UNR]]
+; AUTO_VEC-NEXT:    [[WIDE_LOAD_EPIL:%.*]] = load <8 x float>, ptr [[TMP38]], align 4
+; AUTO_VEC-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 8
+; AUTO_VEC-NEXT:    [[WIDE_LOAD5_EPIL:%.*]] = load <8 x float>, ptr [[TMP40]], align 4
+; AUTO_VEC-NEXT:    [[TMP42:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 16
+; AUTO_VEC-NEXT:    [[WIDE_LOAD6_EPIL:%.*]] = load <8 x float>, ptr [[TMP42]], align 4
+; AUTO_VEC-NEXT:    [[TMP44:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 24
+; AUTO_VEC-NEXT:    [[WIDE_LOAD7_EPIL:%.*]] = load <8 x float>, ptr [[TMP44]], align 4
 ; AUTO_VEC-NEXT:    [[TMP46:%.*]] = fadd reassoc <8 x float> [[VEC_IND_UNR]], [[WIDE_LOAD_EPIL]]
 ; AUTO_VEC-NEXT:    [[TMP47:%.*]] = fadd reassoc <8 x float> [[STEP_ADD_EPIL]], [[WIDE_LOAD5_EPIL]]
 ; AUTO_VEC-NEXT:    [[TMP48:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2_EPIL]], [[WIDE_LOAD6_EPIL]]
 ; AUTO_VEC-NEXT:    [[TMP49:%.*]] = fadd reassoc <8 x float> [[STEP_ADD3_EPIL]], [[WIDE_LOAD7_EPIL]]
-; AUTO_VEC-NEXT:    [[TMP50:%.*]] = bitcast float* [[TMP38]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP46]], <8 x float>* [[TMP50]], align 4
-; AUTO_VEC-NEXT:    [[TMP51:%.*]] = bitcast float* [[TMP40]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP47]], <8 x float>* [[TMP51]], align 4
-; AUTO_VEC-NEXT:    [[TMP52:%.*]] = bitcast float* [[TMP42]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP48]], <8 x float>* [[TMP52]], align 4
-; AUTO_VEC-NEXT:    [[TMP53:%.*]] = bitcast float* [[TMP44]] to <8 x float>*
-; AUTO_VEC-NEXT:    store <8 x float> [[TMP49]], <8 x float>* [[TMP53]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP46]], ptr [[TMP38]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP47]], ptr [[TMP40]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP48]], ptr [[TMP42]], align 4
+; AUTO_VEC-NEXT:    store <8 x float> [[TMP49]], ptr [[TMP44]], align 4
 ; AUTO_VEC-NEXT:    br label [[MIDDLE_BLOCK]]
 ; AUTO_VEC:       middle.block:
 ; AUTO_VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
@@ -674,10 +610,10 @@ define void @fadd_reassoc_FMF(float* nocapture %p, i32 %N) {
 ; AUTO_VEC:       for.body:
 ; AUTO_VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; AUTO_VEC-NEXT:    [[X_012:%.*]] = phi float [ [[ADD3:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
-; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[P]], i64 [[INDVARS_IV]]
-; AUTO_VEC-NEXT:    [[TMP54:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
+; AUTO_VEC-NEXT:    [[TMP54:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP54]]
-; AUTO_VEC-NEXT:    store float [[ADD]], float* [[ARRAYIDX]], align 4
+; AUTO_VEC-NEXT:    store float [[ADD]], ptr [[ARRAYIDX]], align 4
 ; AUTO_VEC-NEXT:    [[ADD3]] = fadd reassoc float [[X_012]], 4.200000e+01
 ; AUTO_VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; AUTO_VEC-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
@@ -697,10 +633,10 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %x.012 = phi float [ 1.000000e+00, %for.body.preheader ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %p, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %p, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %add = fadd reassoc float %x.012, %1
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %add3 = fadd reassoc float %x.012, 4.200000e+01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp.not = icmp eq i64 %indvars.iv.next, %0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
index 9031b87..dae341b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll
@@ -8,15 +8,15 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: Found an estimated cost of 1 for VF 1 For instruction:   %neg = fneg float %{{.*}}
 ; CHECK: Found an estimated cost of 1 for VF 2 For instruction:   %neg = fneg float %{{.*}}
 ; CHECK: Found an estimated cost of 1 for VF 4 For instruction:   %neg = fneg float %{{.*}}
-define void @fneg_cost(float* %a, i64 %n) {
+define void @fneg_cost(ptr %a, i64 %n) {
 entry:
   br label %for.body
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %neg = fneg float %0
-  store float %neg, float* %arrayidx, align 4
+  store float %neg, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
index 8b7d51e..4d92c1a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
@@ -20,11 +20,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [10000 x float], [10000 x float]* @float_array, i64 0, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [10000 x float], ptr @float_array, i64 0, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %conv = fptoui float %1 to i32
-  %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [10000 x i32], ptr @unsigned_array, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
index b54ec8a..03783d3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx"
 
 define void @convert() {
 entry:
-  %0 = load i32, i32* @n, align 4
+  %0 = load i32, ptr @n, align 4
   %cmp4 = icmp eq i32 %0, 0
   br i1 %cmp4, label %for.end, label %for.body.preheader
 
@@ -22,11 +22,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [10000 x double], [10000 x double]* @double_array, i64 0, i64 %indvars.iv
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds [10000 x double], ptr @double_array, i64 0, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx, align 8
   %conv = fptoui double %1 to i32
-  %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [10000 x i32], ptr @unsigned_array, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %2 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp ult i32 %2, %0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
index 3d77758..4f7dee2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
@@ -6,16 +6,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 
 ; CHECK: cost of 1 for VF 1 For instruction:   %conv = fptosi float %tmp to i8
-define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind {
+define void @float_to_sint8_cost(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) nounwind {
 entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %tmp = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %tmp = load float, ptr %arrayidx, align 4
   %conv = fptosi float %tmp to i8
-  %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  store i8 %conv, i8* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  store i8 %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
index 940cba1..023728a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -2,16 +2,16 @@
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc18.0.0"
 
-define void @test1() #0 personality i32 (...)* @__CxxFrameHandler3 {
+define void @test1() #0 personality ptr @__CxxFrameHandler3 {
 entry:
-  invoke void @_CxxThrowException(i8* null, i8* null)
+  invoke void @_CxxThrowException(ptr null, ptr null)
           to label %unreachable unwind label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %entry
   %0 = catchswitch within none [label %catch] unwind to caller
 
 catch:                                            ; preds = %catch.dispatch
-  %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+  %1 = catchpad within %0 [ptr null, i32 64, ptr null]
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
@@ -32,10 +32,10 @@ unreachable:                                      ; preds = %entry
 }
 
 ; CHECK-LABEL: define void @test1(
-; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
+; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
 ; CHECK: call <8 x double> @llvm.floor.v8f64(<8 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
 
-declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
 
 declare i32 @__CxxFrameHandler3(...)
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
index b817d58..3457d59 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
@@ -30,32 +30,32 @@ define void @_Z4testmm(i64 %size, i64 %offset) {
 ; CHECK-NEXT:    [[B_054:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD30:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[V_055]], [[OFFSET:%.*]]
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[ADD]], 3
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 [[MUL]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[MUL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MUL3:%.*]] = fmul fast float [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[MUL5:%.*]] = fmul fast float [[MUL3]], [[TMP2]]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    [[MUL7:%.*]] = fmul fast float [[MUL5]], [[TMP3]]
-; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 [[V_055]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[ARRAYIDX8]], align 4
+; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[V_055]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
 ; CHECK-NEXT:    [[MUL9:%.*]] = fmul fast float [[MUL7]], [[TMP4]]
 ; CHECK-NEXT:    [[ADD10]] = fadd fast float [[R_057]], [[MUL9]]
 ; CHECK-NEXT:    [[ARRAYIDX_SUM:%.*]] = add i64 [[MUL]], 1
-; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 [[ARRAYIDX_SUM]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[ARRAYIDX11]], align 4
+; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[ARRAYIDX_SUM]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
 ; CHECK-NEXT:    [[MUL13:%.*]] = fmul fast float [[TMP1]], [[TMP5]]
 ; CHECK-NEXT:    [[MUL15:%.*]] = fmul fast float [[TMP2]], [[MUL13]]
 ; CHECK-NEXT:    [[MUL17:%.*]] = fmul fast float [[TMP3]], [[MUL15]]
 ; CHECK-NEXT:    [[MUL19:%.*]] = fmul fast float [[TMP4]], [[MUL17]]
 ; CHECK-NEXT:    [[ADD20]] = fadd fast float [[G_056]], [[MUL19]]
 ; CHECK-NEXT:    [[ARRAYIDX_SUM52:%.*]] = add i64 [[MUL]], 2
-; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 [[ARRAYIDX_SUM52]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[ARRAYIDX21]], align 4
+; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 [[ARRAYIDX_SUM52]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX21]], align 4
 ; CHECK-NEXT:    [[MUL23:%.*]] = fmul fast float [[TMP1]], [[TMP6]]
 ; CHECK-NEXT:    [[MUL25:%.*]] = fmul fast float [[TMP2]], [[MUL23]]
 ; CHECK-NEXT:    [[MUL27:%.*]] = fmul fast float [[TMP3]], [[MUL25]]
@@ -76,9 +76,9 @@ define void @_Z4testmm(i64 %size, i64 %offset) {
 ; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i8 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[G_0_LCSSA:%.*]] = phi i8 [ [[PHITMP60]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[B_0_LCSSA:%.*]] = phi i8 [ [[PHITMP61]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    store i8 [[R_0_LCSSA]], i8* @r_, align 1
-; CHECK-NEXT:    store i8 [[G_0_LCSSA]], i8* @g_, align 1
-; CHECK-NEXT:    store i8 [[B_0_LCSSA]], i8* @b_, align 1
+; CHECK-NEXT:    store i8 [[R_0_LCSSA]], ptr @r_, align 1
+; CHECK-NEXT:    store i8 [[G_0_LCSSA]], ptr @g_, align 1
+; CHECK-NEXT:    store i8 [[B_0_LCSSA]], ptr @b_, align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -95,32 +95,32 @@ for.body:
   %b.054 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add30, %for.body ]
   %add = add i64 %v.055, %offset
   %mul = mul i64 %add, 3
-  %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %mul
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 %v.055
+  %1 = load float, ptr %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
-  %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 %v.055
+  %2 = load float, ptr %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
-  %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
-  %3 = load float, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 %v.055
+  %3 = load float, ptr %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
-  %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
-  %4 = load float, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 %v.055
+  %4 = load float, ptr %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i64 %mul, 1
-  %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
-  %5 = load float, float* %arrayidx11, align 4
+  %arrayidx11 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum
+  %5 = load float, ptr %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
   %mul19 = fmul fast float %4, %mul17
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i64 %mul, 2
-  %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
-  %6 = load float, float* %arrayidx21, align 4
+  %arrayidx21 = getelementptr inbounds [1536 x float], ptr @src_data, i64 0, i64 %arrayidx.sum52
+  %6 = load float, ptr %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
@@ -143,8 +143,8 @@ for.end:
   %r.0.lcssa = phi i8 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %g.0.lcssa = phi i8 [ %phitmp60, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   %b.0.lcssa = phi i8 [ %phitmp61, %for.cond.for.end_crit_edge ], [ 0, %entry ]
-  store i8 %r.0.lcssa, i8* @r_, align 1
-  store i8 %g.0.lcssa, i8* @g_, align 1
-  store i8 %b.0.lcssa, i8* @b_, align 1
+  store i8 %r.0.lcssa, ptr @r_, align 1
+  store i8 %g.0.lcssa, ptr @g_, align 1
+  store i8 %b.0.lcssa, ptr @b_, align 1
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
index ab03e91..937ff6f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
@@ -26,11 +26,11 @@ define void @strided_load_i64() {
 
 ; <label>:1:                                      ; preds = %0, %1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [10240 x i64], [10240 x i64]* @A, i64 0, i64 %indvars.iv
-  %3 = load i64, i64* %2, align 16
+  %2 = getelementptr inbounds [10240 x i64], ptr @A, i64 0, i64 %indvars.iv
+  %3 = load i64, ptr %2, align 16
   %4 = add i64 %3, 5
-  %5 = getelementptr inbounds [10240 x i64], [10240 x i64]* @B, i64 0, i64 %indvars.iv
-  store i64 %4, i64* %5, align 16
+  %5 = getelementptr inbounds [10240 x i64], ptr @B, i64 0, i64 %indvars.iv
+  store i64 %4, ptr %5, align 16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 8
   %6 = icmp slt i64 %indvars.iv.next, 1024
   br i1 %6, label %1, label %7
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index a58fd66..07a98da 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -28,13 +28,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -56,16 +56,16 @@ define void @example1() nounwind uwtable ssp {
 ;UNROLL: store <4 x i32>
 ;UNROLL: store <4 x i32>
 ;UNROLL: ret void
-define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10b(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %3 = load i16, ptr %2, align 2
   %4 = sext i16 %3 to i32
-  %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %4, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %4, ptr %5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll b/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll
index e9f92c9..44e9d3e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/int128_no_gather.ll
@@ -24,42 +24,42 @@ do.body:                                          ; preds = %do.body, %entry
   %i.0 = phi i128 [ 1, %entry ], [ %add11, %do.body ]
   %and = and i128 %j.0, 32767
   %idxprom = trunc i128 %i.0 to i64
-  %arrayidx = getelementptr inbounds [151 x i128], [151 x i128]* @x, i64 0, i64 %idxprom
-  store i128 %and, i128* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds [151 x i128], ptr @x, i64 0, i64 %idxprom
+  store i128 %and, ptr %arrayidx, align 16
   %add = add nuw nsw i128 %j.0, 11111
   %and1 = and i128 %add, 32767
   %add2 = add nuw nsw i128 %i.0, 1
   %idxprom3 = trunc i128 %add2 to i64
-  %arrayidx4 = getelementptr inbounds [151 x i128], [151 x i128]* @x, i64 0, i64 %idxprom3
-  store i128 %and1, i128* %arrayidx4, align 16
+  %arrayidx4 = getelementptr inbounds [151 x i128], ptr @x, i64 0, i64 %idxprom3
+  store i128 %and1, ptr %arrayidx4, align 16
   %add5 = add nuw nsw i128 %j.0, 22222
   %and6 = and i128 %add5, 32767
   %add7 = add nuw nsw i128 %i.0, 2
   %idxprom8 = trunc i128 %add7 to i64
-  %arrayidx9 = getelementptr inbounds [151 x i128], [151 x i128]* @x, i64 0, i64 %idxprom8
-  store i128 %and6, i128* %arrayidx9, align 16
+  %arrayidx9 = getelementptr inbounds [151 x i128], ptr @x, i64 0, i64 %idxprom8
+  store i128 %and6, ptr %arrayidx9, align 16
   %add10 = add nuw nsw i128 %j.0, 33333
   %add11 = add nuw nsw i128 %i.0, 3
   %cmp = icmp slt i128 %add11, 149
   br i1 %cmp, label %do.body, label %do.end
 
 do.end:                                           ; preds = %do.body
-  store i128 1766649, i128* getelementptr inbounds ([151 x i128], [151 x i128]* @x, i64 0, i64 149), align 16
-  store i128 1766649, i128* getelementptr inbounds ([151 x i128], [151 x i128]* @x, i64 0, i64 150), align 16
+  store i128 1766649, ptr getelementptr inbounds ([151 x i128], ptr @x, i64 0, i64 149), align 16
+  store i128 1766649, ptr getelementptr inbounds ([151 x i128], ptr @x, i64 0, i64 150), align 16
   %call = tail call i32 @y3inner()
-  %0 = load i128, i128* getelementptr inbounds ([151 x i128], [151 x i128]* @x, i64 0, i64 25), align 16
+  %0 = load i128, ptr getelementptr inbounds ([151 x i128], ptr @x, i64 0, i64 25), align 16
   %cmp12 = icmp eq i128 %0, 5085
   br i1 %cmp12, label %if.then, label %if.else
 
 if.then:                                          ; preds = %do.end
-  %puts = tail call i32 @puts(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @str, i64 0, i64 0))
+  %puts = tail call i32 @puts(ptr @str)
   br label %if.end
 
 if.else:                                          ; preds = %do.end
   %coerce.sroa.0.0.extract.trunc = trunc i128 %0 to i64
   %coerce.sroa.2.0.extract.shift = lshr i128 %0, 64
   %coerce.sroa.2.0.extract.trunc = trunc i128 %coerce.sroa.2.0.extract.shift to i64
-  %call14 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([44 x i8], [44 x i8]* @.str.1, i64 0, i64 0), i64 %coerce.sroa.0.0.extract.trunc, i64 %coerce.sroa.2.0.extract.trunc)
+  %call14 = tail call i32 (ptr, ...) @printf(ptr @.str.1, i64 %coerce.sroa.0.0.extract.trunc, i64 %coerce.sroa.2.0.extract.trunc)
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
@@ -67,9 +67,9 @@ if.end:                                           ; preds = %if.else, %if.then
 }
 
 ; Function Attrs: nounwind
-declare i32 @printf(i8*, ...) #1
+declare i32 @printf(ptr, ...) #1
 ; Function Attrs: nounwind
-declare i32 @puts(i8* nocapture readonly) #2
+declare i32 @puts(ptr nocapture readonly) #2
 
 attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll
index b1c0696..d3604b5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave_short_tc.ll
@@ -43,14 +43,14 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds [5 x i32], [5 x i32]* @a, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [5 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %2, %mul
-  store i32 %add, i32* %arrayidx2, align 4
+  store i32 %add, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !prof !1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll
index b4dc283..e75d469 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-large-gap.ll
@@ -6,7 +6,7 @@
 ; interleaving.
 
 ; Test from https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=7560
-define void @test1(%struct.ST4* noalias %B) {
+define void @test1(ptr noalias %B) {
 ; CHECK-LABEL: @test1
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label %for.body
@@ -23,14 +23,14 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %p1 = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
-  store i32 65536, i32* %p1, align 4
-  %p2 = getelementptr i32, i32* %p1, i32 -2147483648
-  store i32 65536, i32* %p2, align 4
-  %p3 = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
-  store i32 10, i32* %p3, align 4
-  %p4 = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
-  store i32 12, i32* %p4, align 4
+  %p1 = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 0
+  store i32 65536, ptr %p1, align 4
+  %p2 = getelementptr i32, ptr %p1, i32 -2147483648
+  store i32 65536, ptr %p2, align 4
+  %p3 = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 2
+  store i32 10, ptr %p3, align 4
+  %p4 = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 3
+  store i32 12, ptr %p4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -57,11 +57,11 @@ entry:
 for.body:
   %iv = phi i64 [ 1, %entry ], [ %iv.next, %for.body ]
   %iv.next = add nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* undef, i64 0, i64 %iv.next
-  %G2 = getelementptr i32, i32* %arrayidx, i64 %iv.next
-  %G9 = getelementptr i32, i32* %G2, i32 -2147483647
-  store i32 0, i32* %G2
-  store i32 1, i32* %G9
+  %arrayidx = getelementptr inbounds [3 x i32], ptr undef, i64 0, i64 %iv.next
+  %G2 = getelementptr i32, ptr %arrayidx, i64 %iv.next
+  %G9 = getelementptr i32, ptr %G2, i32 -2147483647
+  store i32 0, ptr %G2
+  store i32 1, ptr %G9
   %cmp = icmp ule i64 %iv, 1000
   br i1 false, label %for.body, label %exit
 
@@ -87,11 +87,11 @@ entry:
 for.body:
   %iv = phi i64 [ 1, %entry ], [ %iv.next, %for.body ]
   %iv.next = add nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* undef, i64 0, i64 %iv.next
-  %G2 = getelementptr i32, i32* %arrayidx, i64 %iv.next
-  %G9 = getelementptr i32, i32* %G2, i32 -2147483648
-  store i32 0, i32* %G2
-  store i32 1, i32* %G9
+  %arrayidx = getelementptr inbounds [3 x i32], ptr undef, i64 0, i64 %iv.next
+  %G2 = getelementptr i32, ptr %arrayidx, i64 %iv.next
+  %G9 = getelementptr i32, ptr %G2, i32 -2147483648
+  store i32 0, ptr %G2
+  store i32 1, ptr %G9
   %cmp = icmp ule i64 %iv, 1000
   br i1 false, label %for.body, label %exit
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll
index 3d4de06..8748e43 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-waw-dependency.ll
@@ -14,7 +14,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; store 0 and store (x + 1) to the same memory.
 ; For now, we identify such unsafe dependency and disable adding the
 ; store into the interleaved group.
-; In this test case, because we disable adding store into i32* %storeaddr12 and
+; In this test case, because we disable adding store into ptr %storeaddr12 and
 ; storeaddr22, we create interleaved groups with gaps and
 ; disable that interleaved group. So, we are only left with valid interleaved
 ; groups.
@@ -23,81 +23,78 @@ target triple = "x86_64-unknown-linux-gnu"
 
 
 ; CHECK:      LV: Analyzing interleaved accesses...
-; CHECK:      LV: Creating an interleave group with:  store i32 %tmp34, i32* %storeaddr32, align 4
-; CHECK-NEXT: LV: Inserted:  store i32 %tmp24, i32* %storeaddr22, align 4
-; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, i32* %storeaddr32, align 4
-; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, i32* %storeaddr12, align 4
-; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, i32* %storeaddr32, align 4
-; CHECK:      LV: Invalidated store group due to dependence between   store i32 %tmp24, i32* %storeaddr22, align 4 and   store i32 0, i32* %storeaddr22, align 4
-; CHECK-NEXT: LV: Creating an interleave group with:  store i32 %tmp24, i32* %storeaddr22, align 4
-; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, i32* %storeaddr12, align 4
-; CHECK-NEXT:     into the interleave group with  store i32 %tmp24, i32* %storeaddr22, align 4
-; CHECK-NEXT: LV: Invalidated store group due to dependence between   store i32 %tmp14, i32* %storeaddr12, align 4 and   store i32 0, i32* %storeaddr12, align 4
-
-
-define void @test(i8* nonnull align 8 dereferenceable_or_null(24) %arg) {
+; CHECK:      LV: Creating an interleave group with:  store i32 %tmp34, ptr %storeaddr32, align 4
+; CHECK-NEXT: LV: Inserted:  store i32 %tmp24, ptr %storeaddr22, align 4
+; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, ptr %storeaddr32, align 4
+; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, ptr %storeaddr12, align 4
+; CHECK-NEXT:     into the interleave group with  store i32 %tmp34, ptr %storeaddr32, align 4
+; CHECK:      LV: Invalidated store group due to dependence between   store i32 %tmp24, ptr %storeaddr22, align 4 and   store i32 0, ptr %storeaddr22, align 4
+; CHECK-NEXT: LV: Creating an interleave group with:  store i32 %tmp24, ptr %storeaddr22, align 4
+; CHECK-NEXT: LV: Inserted:  store i32 %tmp14, ptr %storeaddr12, align 4
+; CHECK-NEXT:     into the interleave group with  store i32 %tmp24, ptr %storeaddr22, align 4
+; CHECK-NEXT: LV: Invalidated store group due to dependence between   store i32 %tmp14, ptr %storeaddr12, align 4 and   store i32 0, ptr %storeaddr12, align 4
+
+
+define void @test(ptr nonnull align 8 dereferenceable_or_null(24) %arg) {
 bb:
-  %tmp = getelementptr inbounds i8, i8* %arg, i64 16
-  %tmp1 = bitcast i8* %tmp to i8**
-  %tmp2 = load i8*, i8** %tmp1, align 8
-  %tmp3 = getelementptr inbounds i8, i8* %arg, i64 8
-  %tmp4 = bitcast i8* %tmp3 to i8**
-  %tmp5 = load i8*, i8** %tmp4, align 8
-  %tmp6 = getelementptr inbounds i8, i8* %tmp5, i64 12
-  %tmp7 = bitcast i8* %tmp6 to i32*
-  %tmp8 = getelementptr inbounds i8, i8* %tmp2, i64 12
+  %tmp = getelementptr inbounds i8, ptr %arg, i64 16
+  %tmp2 = load ptr, ptr %tmp, align 8
+  %tmp3 = getelementptr inbounds i8, ptr %arg, i64 8
+  %tmp5 = load ptr, ptr %tmp3, align 8
+  %tmp6 = getelementptr inbounds i8, ptr %tmp5, i64 12
+  %tmp8 = getelementptr inbounds i8, ptr %tmp2, i64 12
   br label %header
 
 header:                                              ; preds = %latch, %bb
   %tmp10 = phi i64 [ %tmp41, %latch ], [ 3, %bb ]
   %tmp11 = add nsw i64 %tmp10, -1
-  %storeaddr12 = getelementptr inbounds i32, i32* %tmp7, i64 %tmp11
-  %tmp13 = load i32, i32* %storeaddr12, align 4
+  %storeaddr12 = getelementptr inbounds i32, ptr %tmp6, i64 %tmp11
+  %tmp13 = load i32, ptr %storeaddr12, align 4
   %tmp14 = add i32 %tmp13, 1
-  store i32 %tmp14, i32* %storeaddr12, align 4
+  store i32 %tmp14, ptr %storeaddr12, align 4
   %tmp15 = icmp slt i32 %tmp13, 1
   %tmp16 = xor i1 %tmp15, true
   %tmp17 = zext i1 %tmp16 to i8
-  %tmp18 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp10
-  store i8 %tmp17, i8* %tmp18, align 1
+  %tmp18 = getelementptr inbounds i8, ptr %tmp8, i64 %tmp10
+  store i8 %tmp17, ptr %tmp18, align 1
   br i1 %tmp15, label %bb19, label %bb20
 
 bb19:                                             ; preds = %header
-  store i32 0, i32* %storeaddr12, align 4
+  store i32 0, ptr %storeaddr12, align 4
   br label %bb20
 
 bb20:                                             ; preds = %bb19, %header
   %tmp21 = add nuw nsw i64 %tmp10, 1
-  %storeaddr22 = getelementptr inbounds i32, i32* %tmp7, i64 %tmp10
-  %tmp23 = load i32, i32* %storeaddr22, align 4
+  %storeaddr22 = getelementptr inbounds i32, ptr %tmp6, i64 %tmp10
+  %tmp23 = load i32, ptr %storeaddr22, align 4
   %tmp24 = add i32 %tmp23, 1
-  store i32 %tmp24, i32* %storeaddr22, align 4
+  store i32 %tmp24, ptr %storeaddr22, align 4
   %tmp25 = icmp slt i32 %tmp23, 1
   %tmp26 = xor i1 %tmp25, true
   %tmp27 = zext i1 %tmp26 to i8
-  %tmp28 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp21
-  store i8 %tmp27, i8* %tmp28, align 1
+  %tmp28 = getelementptr inbounds i8, ptr %tmp8, i64 %tmp21
+  store i8 %tmp27, ptr %tmp28, align 1
   br i1 %tmp25, label %bb29, label %bb30
 
 bb29:                                             ; preds = %bb20
-  store i32 0, i32* %storeaddr22, align 4
+  store i32 0, ptr %storeaddr22, align 4
   br label %bb30
 
 bb30:                                             ; preds = %bb29, %bb20
   %tmp31 = add nuw nsw i64 %tmp10, 2
-  %storeaddr32 = getelementptr inbounds i32, i32* %tmp7, i64 %tmp21
-  %tmp33 = load i32, i32* %storeaddr32, align 4
+  %storeaddr32 = getelementptr inbounds i32, ptr %tmp6, i64 %tmp21
+  %tmp33 = load i32, ptr %storeaddr32, align 4
   %tmp34 = add i32 %tmp33, 1
-  store i32 %tmp34, i32* %storeaddr32, align 4
+  store i32 %tmp34, ptr %storeaddr32, align 4
   %tmp35 = icmp slt i32 %tmp33, 1
   %tmp36 = xor i1 %tmp35, true
   %tmp37 = zext i1 %tmp36 to i8
-  %tmp38 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp31
-  store i8 %tmp37, i8* %tmp38, align 1
+  %tmp38 = getelementptr inbounds i8, ptr %tmp8, i64 %tmp31
+  store i8 %tmp37, ptr %tmp38, align 1
   br i1 %tmp35, label %bb39, label %latch
 
 bb39:                                             ; preds = %bb30
-  store i32 0, i32* %storeaddr32, align 4
+  store i32 0, ptr %storeaddr32, align 4
   br label %latch
 
 latch:                                             ; preds = %bb39, %bb30
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
index 53f85b3..e8b9386 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
@@ -5,7 +5,7 @@
 ; RUN: opt -S -mtriple=x86_64-pc_linux -passes=loop-vectorize,instcombine -mcpu=slm < %s | FileCheck %s --check-prefix=SSE
 ; RUN: opt -S -mtriple=x86_64-pc_linux -passes=loop-vectorize,instcombine -mcpu=atom < %s | FileCheck %s --check-prefix=ATOM
 
-define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) {
+define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; SSE-LABEL: @foo(
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -16,24 +16,20 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; SSE-NEXT:    [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
 ; SSE-NEXT:    [[TMP1:%.*]] = shl i64 [[INDEX]], 1
 ; SSE-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], 8
-; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; SSE-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP3]] to <8 x i32>*
-; SSE-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP4]] to <8 x i32>*
-; SSE-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP5]], align 4
-; SSE-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP6]], align 4
+; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; SSE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; SSE-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4
+; SSE-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
 ; SSE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; SSE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; SSE-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; SSE-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; SSE-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
 ; SSE-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]]
-; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; SSE-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
-; SSE-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4
-; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i64 4
-; SSE-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
-; SSE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP12]], align 4
+; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; SSE-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
+; SSE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 4
+; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP11]], align 4
 ; SSE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; SSE-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; SSE-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -60,18 +56,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX1-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], 16
 ; AVX1-NEXT:    [[TMP5:%.*]] = shl i64 [[INDEX]], 1
 ; AVX1-NEXT:    [[TMP6:%.*]] = or i64 [[TMP5]], 24
-; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; AVX1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; AVX1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
-; AVX1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]]
-; AVX1-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>*
-; AVX1-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>*
-; AVX1-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
-; AVX1-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
-; AVX1-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4
-; AVX1-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP12]], align 4
-; AVX1-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4
-; AVX1-NEXT:    [[WIDE_VEC3:%.*]] = load <8 x i32>, <8 x i32>* [[TMP14]], align 4
+; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; AVX1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; AVX1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
+; AVX1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
+; AVX1-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
+; AVX1-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4
+; AVX1-NEXT:    [[WIDE_VEC2:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4
+; AVX1-NEXT:    [[WIDE_VEC3:%.*]] = load <8 x i32>, ptr [[TMP10]], align 4
 ; AVX1-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; AVX1-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; AVX1-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -84,18 +76,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX1-NEXT:    [[TMP16:%.*]] = add nsw <4 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
 ; AVX1-NEXT:    [[TMP17:%.*]] = add nsw <4 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
 ; AVX1-NEXT:    [[TMP18:%.*]] = add nsw <4 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
-; AVX1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; AVX1-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP20]], align 4
-; AVX1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 4
-; AVX1-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP16]], <4 x i32>* [[TMP22]], align 4
-; AVX1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 8
-; AVX1-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP24]], align 4
-; AVX1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 12
-; AVX1-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
-; AVX1-NEXT:    store <4 x i32> [[TMP18]], <4 x i32>* [[TMP26]], align 4
+; AVX1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; AVX1-NEXT:    store <4 x i32> [[TMP15]], ptr [[TMP19]], align 4
+; AVX1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 4
+; AVX1-NEXT:    store <4 x i32> [[TMP16]], ptr [[TMP21]], align 4
+; AVX1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 8
+; AVX1-NEXT:    store <4 x i32> [[TMP17]], ptr [[TMP23]], align 4
+; AVX1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 12
+; AVX1-NEXT:    store <4 x i32> [[TMP18]], ptr [[TMP25]], align 4
 ; AVX1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; AVX1-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; AVX1-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -122,18 +110,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX2-NEXT:    [[TMP4:%.*]] = or i64 [[TMP3]], 32
 ; AVX2-NEXT:    [[TMP5:%.*]] = shl i64 [[INDEX]], 1
 ; AVX2-NEXT:    [[TMP6:%.*]] = or i64 [[TMP5]], 48
-; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
-; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]]
-; AVX2-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP7]] to <16 x i32>*
-; AVX2-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>*
-; AVX2-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP9]] to <16 x i32>*
-; AVX2-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>*
-; AVX2-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4
-; AVX2-NEXT:    [[WIDE_VEC1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP12]], align 4
-; AVX2-NEXT:    [[WIDE_VEC2:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4
-; AVX2-NEXT:    [[WIDE_VEC3:%.*]] = load <16 x i32>, <16 x i32>* [[TMP14]], align 4
+; AVX2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; AVX2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; AVX2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP4]]
+; AVX2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
+; AVX2-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4
+; AVX2-NEXT:    [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP8]], align 4
+; AVX2-NEXT:    [[WIDE_VEC2:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4
+; AVX2-NEXT:    [[WIDE_VEC3:%.*]] = load <16 x i32>, ptr [[TMP10]], align 4
 ; AVX2-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; AVX2-NEXT:    [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; AVX2-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC2]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -146,18 +130,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; AVX2-NEXT:    [[TMP16:%.*]] = add nsw <8 x i32> [[STRIDED_VEC8]], [[STRIDED_VEC4]]
 ; AVX2-NEXT:    [[TMP17:%.*]] = add nsw <8 x i32> [[STRIDED_VEC9]], [[STRIDED_VEC5]]
 ; AVX2-NEXT:    [[TMP18:%.*]] = add nsw <8 x i32> [[STRIDED_VEC10]], [[STRIDED_VEC6]]
-; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; AVX2-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP15]], <8 x i32>* [[TMP20]], align 4
-; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 8
-; AVX2-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP16]], <8 x i32>* [[TMP22]], align 4
-; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 16
-; AVX2-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP17]], <8 x i32>* [[TMP24]], align 4
-; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 24
-; AVX2-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <8 x i32>*
-; AVX2-NEXT:    store <8 x i32> [[TMP18]], <8 x i32>* [[TMP26]], align 4
+; AVX2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; AVX2-NEXT:    store <8 x i32> [[TMP15]], ptr [[TMP19]], align 4
+; AVX2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 8
+; AVX2-NEXT:    store <8 x i32> [[TMP16]], ptr [[TMP21]], align 4
+; AVX2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 16
+; AVX2-NEXT:    store <8 x i32> [[TMP17]], ptr [[TMP23]], align 4
+; AVX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 24
+; AVX2-NEXT:    store <8 x i32> [[TMP18]], ptr [[TMP25]], align 4
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
 ; AVX2-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; AVX2-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -178,14 +158,14 @@ define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b)
 ; ATOM:       for.body:
 ; ATOM-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; ATOM-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
-; ATOM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
-; ATOM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; ATOM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
+; ATOM-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; ATOM-NEXT:    [[TMP2:%.*]] = or i64 [[TMP0]], 1
-; ATOM-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; ATOM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
+; ATOM-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; ATOM-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
 ; ATOM-NEXT:    [[ADD4:%.*]] = add nsw i32 [[TMP3]], [[TMP1]]
-; ATOM-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; ATOM-NEXT:    store i32 [[ADD4]], i32* [[ARRAYIDX6]], align 4
+; ATOM-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; ATOM-NEXT:    store i32 [[ADD4]], ptr [[ARRAYIDX6]], align 4
 ; ATOM-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; ATOM-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
 ; ATOM-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
@@ -199,14 +179,14 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %0
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
   %2 = or i64 %0, 1
-  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx3, align 4
   %add4 = add nsw i32 %3, %1
-  %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %add4, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %add4, ptr %arrayidx6, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
index 5fdafb4..038852f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]])
@@ -16,8 +16,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -31,7 +31,7 @@ for.end:
 !3 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]])
@@ -44,8 +44,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -58,7 +58,7 @@ for.end:
 !22 = !{!"llvm.loop.vectorize.width", i32 8}
 !23 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_sin(<2 x double> [[TMP4:%.*]])
@@ -71,8 +71,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -85,7 +85,7 @@ for.end:
 !32 = !{!"llvm.loop.vectorize.width", i32 2}
 !33 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_sinf(<8 x float> [[TMP4:%.*]])
@@ -98,8 +98,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -112,7 +112,7 @@ for.end:
 !42 = !{!"llvm.loop.vectorize.width", i32 8}
 !43 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_cos(<2 x double> [[TMP4:%.*]])
@@ -125,8 +125,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
@@ -139,7 +139,7 @@ for.end:
 !52 = !{!"llvm.loop.vectorize.width", i32 2}
 !53 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]])
@@ -152,8 +152,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
@@ -166,7 +166,7 @@ for.end:
 !62 = !{!"llvm.loop.vectorize.width", i32 8}
 !63 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_cos(<2 x double> [[TMP4:%.*]])
@@ -179,8 +179,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
@@ -193,7 +193,7 @@ for.end:
 !72 = !{!"llvm.loop.vectorize.width", i32 2}
 !73 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_cosf(<8 x float> [[TMP4:%.*]])
@@ -206,8 +206,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81
@@ -221,7 +221,7 @@ for.end:
 !83 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <8 x float> @_ZGVdN8v_expf
@@ -233,8 +233,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91
@@ -247,7 +247,7 @@ for.end:                                          ; preds = %for.body
 !92 = !{!"llvm.loop.vectorize.width", i32 8}
 !93 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @exp_f32_intrin(float* nocapture %varray) {
+define void @exp_f32_intrin(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrin
 ; CHECK-LABEL: vector.body
 ; CHECK: <8 x float> @_ZGVdN8v_expf
@@ -259,8 +259,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101
@@ -274,7 +274,7 @@ for.end:                                          ; preds = %for.body
 !103 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32
 ; CHECK-LABEL: vector.body
 ; CHECK: <8 x float> @_ZGVdN8v_logf
@@ -286,8 +286,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111
@@ -300,7 +300,7 @@ for.end:                                          ; preds = %for.body
 !112 = !{!"llvm.loop.vectorize.width", i32 8}
 !113 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <8 x float> @_ZGVdN8vv_powf
@@ -311,11 +311,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121
@@ -328,7 +328,7 @@ for.end:                                          ; preds = %for.body
 !122 = !{!"llvm.loop.vectorize.width", i32 8}
 !123 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrin(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrin
 ; CHECK-LABEL:    vector.body
 ; CHECK: <8 x float> @_ZGVdN8vv_powf
@@ -339,11 +339,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
index b4023fa..d0d0d78 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4v___expf_finite
@@ -15,8 +15,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__expf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -29,7 +29,7 @@ for.end:                                          ; preds = %for.body
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
 !3 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x double> @_ZGVdN4v___exp_finite
@@ -42,8 +42,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__exp_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11
@@ -56,7 +56,7 @@ for.end:                                          ; preds = %for.body
 !12 = !{!"llvm.loop.vectorize.width", i32 4}
 !13 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4v___logf_finite
@@ -69,8 +69,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__logf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -83,7 +83,7 @@ for.end:                                          ; preds = %for.body
 !22 = !{!"llvm.loop.vectorize.width", i32 4}
 !23 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x double> @_ZGVdN4v___log_finite
@@ -96,8 +96,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -110,7 +110,7 @@ for.end:                                          ; preds = %for.body
 !32 = !{!"llvm.loop.vectorize.width", i32 4}
 !33 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4vv___powf_finite
@@ -122,11 +122,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -139,7 +139,7 @@ for.end:                                          ; preds = %for.body
 !42 = !{!"llvm.loop.vectorize.width", i32 4}
 !43 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x double> @_ZGVdN4vv___pow_finite
@@ -151,11 +151,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %indvars.iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
index f982c6e..7a0e44c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]])
@@ -16,8 +16,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -31,7 +31,7 @@ for.end:
 !3 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]])
@@ -44,8 +44,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -58,7 +58,7 @@ for.end:
 !22 = !{!"llvm.loop.vectorize.width", i32 4}
 !23 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]])
@@ -71,8 +71,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -85,7 +85,7 @@ for.end:
 !32 = !{!"llvm.loop.vectorize.width", i32 4}
 !33 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]])
@@ -98,8 +98,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -112,7 +112,7 @@ for.end:
 !42 = !{!"llvm.loop.vectorize.width", i32 4}
 !43 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]])
@@ -125,8 +125,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
@@ -139,7 +139,7 @@ for.end:
 !52 = !{!"llvm.loop.vectorize.width", i32 4}
 !53 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]])
@@ -152,8 +152,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
@@ -166,7 +166,7 @@ for.end:
 !62 = !{!"llvm.loop.vectorize.width", i32 4}
 !63 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]])
@@ -179,8 +179,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
@@ -193,7 +193,7 @@ for.end:
 !72 = !{!"llvm.loop.vectorize.width", i32 4}
 !73 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK-LABEL:    vector.body
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]])
@@ -206,8 +206,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81
@@ -221,7 +221,7 @@ for.end:
 !83 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4v_expf
@@ -233,8 +233,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91
@@ -247,7 +247,7 @@ for.end:                                          ; preds = %for.body
 !92 = !{!"llvm.loop.vectorize.width", i32 4}
 !93 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @exp_f32_intrin(float* nocapture %varray) {
+define void @exp_f32_intrin(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrin
 ; CHECK-LABEL: vector.body
 ; CHECK: <4 x float> @_ZGVbN4v_expf
@@ -259,8 +259,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101
@@ -274,7 +274,7 @@ for.end:                                          ; preds = %for.body
 !103 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32
 ; CHECK-LABEL: vector.body
 ; CHECK: <4 x float> @_ZGVbN4v_logf
@@ -286,8 +286,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111
@@ -300,7 +300,7 @@ for.end:                                          ; preds = %for.body
 !112 = !{!"llvm.loop.vectorize.width", i32 4}
 !113 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4vv_powf
@@ -311,11 +311,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121
@@ -328,7 +328,7 @@ for.end:                                          ; preds = %for.body
 !122 = !{!"llvm.loop.vectorize.width", i32 4}
 !123 = !{!"llvm.loop.vectorize.enable", i1 true}
 
-define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrin(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrin
 ; CHECK-LABEL:    vector.body
 ; CHECK: <4 x float> @_ZGVbN4vv_powf
@@ -339,11 +339,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131
diff --git a/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
index 8a1bcac..760b10c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/max-mstore.ll
@@ -25,15 +25,15 @@ entry:
 
 for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds [256 x i32], [256 x i32]* @a, i64 0, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [256 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [256 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp ugt i32 %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  store i32 %0, i32* %arrayidx2, align 4
+  store i32 %0, ptr %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
index 7d9be8b..562f0a0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -4,16 +4,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK: <4 x float>
-define void @trivial_loop(float* nocapture %a) nounwind uwtable optsize {
+define void @trivial_loop(ptr nocapture %a) nounwind uwtable optsize {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 8
diff --git a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
index 1788e88..654376c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i8 @mul_i8(i8* %dataA, i8* %dataB, i32 %N) {
+define i8 @mul_i8(ptr %dataA, ptr %dataB, i32 %N) {
 entry:
   %cmp12 = icmp eq i32 %N, 0
   br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
@@ -24,11 +24,11 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %dataA, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %dataA, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %dataB, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %dataB, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = sext i8 %1 to i32
 ; sources of the mul is sext\sext from i8
 ; use pmullw\sext seq.
@@ -73,7 +73,7 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
 }
 
-define i16 @mul_i16(i16* %dataA, i16* %dataB, i32 %N) {
+define i16 @mul_i16(ptr %dataA, ptr %dataB, i32 %N) {
 entry:
   %cmp12 = icmp eq i32 %N, 0
   br i1 %cmp12, label %for.cond.cleanup, label %for.body.preheader
@@ -93,11 +93,11 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %acc.013 = phi i32 [ %add4, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i16, i16* %dataA, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i16, ptr %dataA, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 1
   %conv = sext i16 %0 to i32
-  %arrayidx2 = getelementptr inbounds i16, i16* %dataB, i64 %indvars.iv
-  %1 = load i16, i16* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i16, ptr %dataB, i64 %indvars.iv
+  %1 = load i16, ptr %arrayidx2, align 1
   %conv3 = sext i16 %1 to i32
 ; sources of the mul is sext\sext from i16
 ; use pmulhw\pmullw\pshuf seq.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll b/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
index 01c3c79..91dc2af 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -mtriple=i386-unknown-freebsd -mcpu=i486 -passes=loop-vectorize < %s
 
-define i32 @PR14639(i8* nocapture %s, i32 %len) nounwind {
+define i32 @PR14639(ptr nocapture %s, i32 %len) nounwind {
 entry:
   %cmp4 = icmp sgt i32 %len, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -8,8 +8,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %s, i32 %i.06
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %s, i32 %i.06
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = sext i8 %0 to i32
   %xor = xor i32 %conv, %r.05
   %inc = add nsw i32 %i.06, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
index ca6576d..5aedc58 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 {
+define double @cond_sum(ptr nocapture readonly %v, i32 %n) #0 !dbg !4 {
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !3
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8
@@ -27,8 +27,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !9
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !15
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9
   %add = fadd double %a.08, %cond, !dbg !16
@@ -39,7 +39,7 @@ for.body:                                         ; preds = %for.body.preheader,
 }
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 {
+define double @cond_sum_loop_hint(ptr nocapture readonly %v, i32 %n) #0 !dbg !20 {
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !19
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21
@@ -58,8 +58,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !22
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !22, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !24
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22
   %add = fadd double %a.08, %cond, !dbg !25
diff --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
index c17a829..b1fc96e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 !prof !29 {
+define double @cond_sum(ptr nocapture readonly %v, i32 %n) #0 !dbg !4 !prof !29 {
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !3
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8, !prof !30
@@ -27,8 +27,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !9
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !9, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !15
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9
   %add = fadd double %a.08, %cond, !dbg !16
@@ -39,7 +39,7 @@ for.body:                                         ; preds = %for.body.preheader,
 }
 
 ; Function Attrs: nounwind readonly ssp uwtable
-define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 !prof !29{
+define double @cond_sum_loop_hint(ptr nocapture readonly %v, i32 %n) #0 !dbg !20 !prof !29{
 entry:
   %cmp.7 = icmp sgt i32 %n, 0, !dbg !19
   br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21, !prof !30
@@ -58,8 +58,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11
+  %arrayidx = getelementptr inbounds i32, ptr %v, i64 %indvars.iv, !dbg !22
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !22, !tbaa !11
   %cmp1 = icmp eq i32 %0, 0, !dbg !24
   %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22
   %add = fadd double %a.08, %cond, !dbg !25
diff --git a/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll b/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll
index ed35412..53c0e08 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/nontemporal.ll
@@ -26,7 +26,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64"
 
 ; CHECK-LABEL: @vectorTest(
-define void @vectorTest(i32* noalias readonly %src, i32* noalias %dst, i32 %nElts) {
+define void @vectorTest(ptr noalias readonly %src, ptr noalias %dst, i32 %nElts) {
 entry:
   %cmp8 = icmp eq i32 %nElts, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
@@ -41,13 +41,13 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
 ; Check that we vectorized the load, and that there is no nontemporal hint.
-; CHECK: %wide.load = load <4 x i32>, <4 x i32>* %{{[0-9]+}}, align 4{{$}}
-  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+; CHECK: %wide.load = load <4 x i32>, ptr %{{[0-9]+}}, align 4{{$}}
+  %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
 ; Check that we vectorized the store, and that there is no nontemporal hint.
-; CHECK: store <4 x i32> %wide.load, <4 x i32>* %{{[0-9]+}}, align 4{{$}}
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+; CHECK: store <4 x i32> %wide.load, ptr %{{[0-9]+}}, align 4{{$}}
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -56,7 +56,7 @@ for.body:                                         ; preds = %for.body, %for.body
 ; CHECK-LABEL: @vectorNTStoreTest(
 ; Check that the vectorized type of the store does not appear.
 ; CHECK-NOT: 4 x i32
-define void @vectorNTStoreTest(i32* noalias readonly %src, i32* noalias %dst, i32 %nElts) {
+define void @vectorNTStoreTest(ptr noalias readonly %src, ptr noalias %dst, i32 %nElts) {
 entry:
   %cmp8 = icmp eq i32 %nElts, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
@@ -70,12 +70,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
 ; Check that the store is not vectorized and that we don't lose the !nontemporal hint in it.
-; CHECK: store i32 %{{[0-9]+}}, i32* %arrayidx2, align 4, !nontemporal !4
-  store i32 %0, i32* %arrayidx2, align 4, !nontemporal !0
+; CHECK: store i32 %{{[0-9]+}}, ptr %arrayidx2, align 4, !nontemporal !4
+  store i32 %0, ptr %arrayidx2, align 4, !nontemporal !0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -84,7 +84,7 @@ for.body:                                         ; preds = %for.body, %for.body
 ; CHECK-LABEL: @vectorNTLoadTest(
 ; Check that the vectorized type of the load does not appear.
 ; CHECK-NOT: 4 x i32
-define void @vectorNTLoadTest(i32* noalias readonly %src, i32* noalias %dst, i32 %nElts) {
+define void @vectorNTLoadTest(ptr noalias readonly %src, ptr noalias %dst, i32 %nElts) {
 entry:
   %cmp8 = icmp eq i32 %nElts, 0
   br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
@@ -98,12 +98,12 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
 ; Check that the load is not vectorized and that we don't lose the !nontemporal hint in it.
-; CHECK: load i32, i32* %arrayidx, align 4, !nontemporal !4
-  %0 = load i32, i32* %arrayidx, align 4, !nontemporal !0
-  %arrayidx2 = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+; CHECK: load i32, ptr %arrayidx, align 4, !nontemporal !4
+  %0 = load i32, ptr %arrayidx, align 4, !nontemporal !0
+  %arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
index e286deb..fb232b14a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll
@@ -25,17 +25,17 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
@@ -54,17 +54,17 @@
 ; AVX-LABEL: vector.body:
 ; AVX: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; AVX: %[[VecInd:.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <8 x i64> %[[VecInd]]
+; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> %[[VecInd]]
 ; AVX: %[[VecIndTr:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>
-; AVX: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %[[VecIndTr]], <8 x i32*> %[[AAddr]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; AVX: %[[VecIndTr2:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>
 ; AVX: %[[StoreVal:.*]] = add nsw <8 x i32> %[[VecIndTr2]], %[[Splat]]
 ; AVX: br label %[[InnerLoop:.+]]
 
 ; AVX: [[InnerLoop]]:
 ; AVX: %[[InnerPhi:.*]] = phi <8 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]]
-; AVX: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %[[StoreVal]], <8 x i32*> %[[AAddr2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true
+; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]]
+; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true
 ; AVX: %[[InnerPhiNext]] = add nuw nsw <8 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
 ; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
 ; AVX: %[[InnerCond:.*]] = extractelement <8 x i1> %[[VecCond]], i32 0
@@ -85,17 +85,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3
diff --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index cbd2972..e023ceb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -8,38 +8,38 @@ target triple = "x86_64-unknown-linux-gnu"
 ; now non-vectorizable.
 
 ;CHECK-NOT: <4 x i32>
-define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @parallel_loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 entry:
   %indvars.iv.next.reg2mem = alloca i64
   %indvars.iv.reg2mem = alloca i64
   %"reg2mem alloca point" = bitcast i32 0 to i32
-  store i64 0, i64* %indvars.iv.reg2mem
+  store i64 0, ptr %indvars.iv.reg2mem
   br label %for.body
 
 for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
-  %indvars.iv.reload = load i64, i64* %indvars.iv.reg2mem
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.reload
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !4
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.reload
-  %1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !4
+  %indvars.iv.reload = load i64, ptr %indvars.iv.reg2mem
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.reload
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.reload
+  %1 = load i32, ptr %arrayidx2, align 4, !llvm.access.group !4
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
-  store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
+  store i32 %0, ptr %arrayidx4, align 4, !llvm.access.group !4
   %indvars.iv.next = add i64 %indvars.iv.reload, 1
   ; A new store without the parallel metadata here:
-  store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
-  %indvars.iv.next.reload1 = load i64, i64* %indvars.iv.next.reg2mem
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next.reload1
-  %2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !4
-  store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !4
-  %indvars.iv.next.reload = load i64, i64* %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next, ptr %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload1 = load i64, ptr %indvars.iv.next.reg2mem
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next.reload1
+  %2 = load i32, ptr %arrayidx6, align 4, !llvm.access.group !4
+  store i32 %2, ptr %arrayidx2, align 4, !llvm.access.group !4
+  %indvars.iv.next.reload = load i64, ptr %indvars.iv.next.reg2mem
   %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop !3
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
-  %indvars.iv.next.reload2 = load i64, i64* %indvars.iv.next.reg2mem
-  store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
+  %indvars.iv.next.reload2 = load i64, ptr %indvars.iv.next.reg2mem
+  store i64 %indvars.iv.next.reload2, ptr %indvars.iv.reg2mem
   br label %for.body
 
 for.end:                                          ; preds = %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index 9a3abfc..fd6b13a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -13,23 +13,23 @@ target triple = "x86_64-unknown-linux-gnu"
 ;    }
 ;}
 
-define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: @loop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM3]]
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP3]], 512
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -41,17 +41,17 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
-  store i32 %0, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
+  store i32 %0, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32, i32* %arrayidx6, align 4
-  store i32 %2, i32* %arrayidx2, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %2 = load i32, ptr %arrayidx6, align 4
+  store i32 %2, ptr %arrayidx2, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body
@@ -63,7 +63,7 @@ for.end:                                          ; preds = %for.body
 ; The same loop with parallel loop metadata added to the loop branch
 ; and the memory instructions.
 
-define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @parallel_loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: @parallel_loop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -74,39 +74,36 @@ define void @parallel_loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtabl
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP9]] to i64
 ; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP10]] to i64
 ; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP11]] to i64
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0
-; CHECK-NEXT:    store i32 [[TMP21]], i32* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1
-; CHECK-NEXT:    store i32 [[TMP22]], i32* [[TMP18]], align 4, !llvm.access.group [[ACC_GRP1]]
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP1]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2
-; CHECK-NEXT:    store i32 [[TMP23]], i32* [[TMP19]], align 4, !llvm.access.group [[ACC_GRP1]]
+; CHECK-NEXT:    store i32 [[TMP23]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP1]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
-; CHECK-NEXT:    store i32 [[TMP24]], i32* [[TMP20]], align 4, !llvm.access.group [[ACC_GRP1]]
+; CHECK-NEXT:    store i32 [[TMP24]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP1]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP25]]
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP0]]
-; CHECK-NEXT:    [[TMP28:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[WIDE_LOAD1]], <4 x i32>* [[TMP28]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP0]]
+; CHECK-NEXT:    store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
 ; CHECK-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -124,19 +121,19 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !13
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !13
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4, !llvm.access.group !13
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
   ; This store might have originated from inlining a function with a parallel
   ; loop. Refers to a list with the "original loop reference" (!4) also included.
-  store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !15
+  store i32 %0, ptr %arrayidx4, align 4, !llvm.access.group !15
   %indvars.iv.next = add i64 %indvars.iv, 1
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !13
-  store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %2 = load i32, ptr %arrayidx6, align 4, !llvm.access.group !13
+  store i32 %2, ptr %arrayidx2, align 4, !llvm.access.group !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
@@ -149,23 +146,23 @@ for.end:                                          ; preds = %for.body
 ; accesses refer to a different loop's identifier.
 
 
-define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+define void @mixed_metadata(ptr nocapture %a, ptr nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: @mixed_metadata(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
 ; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM3]]
-; CHECK-NEXT:    store i32 [[TMP0]], i32* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM3]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]]
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]]
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP3]], 512
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -177,19 +174,19 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !16
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !16
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !llvm.access.group !16
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4, !llvm.access.group !16
   %idxprom3 = sext i32 %1 to i64
-  %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %idxprom3
   ; This refers to the loop marked with !7 which we are not in at the moment.
   ; It should prevent detecting as a parallel loop.
-  store i32 %0, i32* %arrayidx4, align 4, !llvm.access.group !17
+  store i32 %0, ptr %arrayidx4, align 4, !llvm.access.group !17
   %indvars.iv.next = add i64 %indvars.iv, 1
-  %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !16
-  store i32 %2, i32* %arrayidx2, align 4, !llvm.access.group !16
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %2 = load i32, ptr %arrayidx6, align 4, !llvm.access.group !16
+  store i32 %2, ptr %arrayidx2, align 4, !llvm.access.group !16
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
diff --git a/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll b/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
index 10f32f9..65ddd52 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @Foo = common global %struct.anon zeroinitializer, align 4
 
 ; CHECK-LABEL: @foo(
-; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, ptr
 ; CHECK: sdiv <4 x i32>
 ; CHECK: store <4 x i32>
 
@@ -17,11 +17,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %div = sdiv i32 %0, 2
-  %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
-  store i32 %div, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds %struct.anon, ptr @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
index f502110..04142da 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
@@ -5,15 +5,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
 target triple = "x86_64-unknown-linux-gnu"
 
 ; Ensure that the 'inbounds' is preserved on the GEPs that feed the load and store in the loop.
-define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrspace(1)* align 8 dereferenceable_or_null(8), i64) #0 {
+define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrspace(1) align 8 dereferenceable_or_null(8), i64) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[PREHEADER:%.*]]
 ; CHECK:       preheader:
-; CHECK-NEXT:    [[DOT10:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP0:%.*]], i64 16
-; CHECK-NEXT:    [[DOT11:%.*]] = bitcast i8 addrspace(1)* [[DOT10]] to i8 addrspace(1)* addrspace(1)*
-; CHECK-NEXT:    [[DOT12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP1:%.*]], i64 16
-; CHECK-NEXT:    [[DOT13:%.*]] = bitcast i8 addrspace(1)* [[DOT12]] to i8 addrspace(1)* addrspace(1)*
+; CHECK-NEXT:    [[DOT10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0:%.*]], i64 16
+; CHECK-NEXT:    [[DOT12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 16
 ; CHECK-NEXT:    [[UMAX2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2:%.*]], i64 1)
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX2]], 16
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
@@ -21,10 +19,10 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp
 ; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP2]], i64 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[UMAX]], 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], 16
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP0]], i64 [[TMP4]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, i8 addrspace(1)* [[TMP1]], i64 [[TMP4]]
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8 addrspace(1)* [[DOT10]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8 addrspace(1)* [[DOT12]], [[SCEVGEP]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 [[TMP4]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i64 [[TMP4]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr addrspace(1) [[DOT10]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr addrspace(1) [[DOT12]], [[SCEVGEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -32,30 +30,22 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP5]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP6]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 4
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP7]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP8]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 8
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP9]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP10]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP5]], i64 12
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP11]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i8 addrspace(1)*>, <4 x i8 addrspace(1)*> addrspace(1)* [[TMP12]], align 8, !alias.scope !0
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP13]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP14]], align 8, !alias.scope !3, !noalias !0
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 4
-; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP15]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD3]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP16]], align 8, !alias.scope !3, !noalias !0
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 8
-; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP17]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD4]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP18]], align 8, !alias.scope !3, !noalias !0
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[TMP13]], i64 12
-; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8 addrspace(1)* addrspace(1)* [[TMP19]] to <4 x i8 addrspace(1)*> addrspace(1)*
-; CHECK-NEXT:    store <4 x i8 addrspace(1)*> [[WIDE_LOAD5]], <4 x i8 addrspace(1)*> addrspace(1)* [[TMP20]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 8
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP9]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 12
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP11]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP13]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP13]], i64 4
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD3]], ptr addrspace(1) [[TMP15]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP13]], i64 8
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP17]], align 8, !alias.scope !3, !noalias !0
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP13]], i64 12
+; CHECK-NEXT:    store <4 x ptr addrspace(1)> [[WIDE_LOAD5]], ptr addrspace(1) [[TMP19]], align 8, !alias.scope !3, !noalias !0
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -67,10 +57,10 @@ define void @foo(i8 addrspace(1)* align 8 dereferenceable_or_null(16), i8 addrsp
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT4:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[DOT18:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT13]], i64 [[INDVARS_IV3]]
-; CHECK-NEXT:    [[V:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT18]], align 8
-; CHECK-NEXT:    [[DOT20:%.*]] = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* [[DOT11]], i64 [[INDVARS_IV3]]
-; CHECK-NEXT:    store i8 addrspace(1)* [[V]], i8 addrspace(1)* addrspace(1)* [[DOT20]], align 8
+; CHECK-NEXT:    [[DOT18:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDVARS_IV3]]
+; CHECK-NEXT:    [[V:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[DOT18]], align 8
+; CHECK-NEXT:    [[DOT20:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDVARS_IV3]]
+; CHECK-NEXT:    store ptr addrspace(1) [[V]], ptr addrspace(1) [[DOT20]], align 8
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1
 ; CHECK-NEXT:    [[DOT21:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT4]], [[TMP2]]
 ; CHECK-NEXT:    br i1 [[DOT21]], label [[LOOP]], label [[LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -81,18 +71,16 @@ entry:
   br label %preheader
 
 preheader:                                       ; preds = %4
-  %.10 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 16
-  %.11 = bitcast i8 addrspace(1)* %.10 to i8 addrspace(1)* addrspace(1)*
-  %.12 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 16
-  %.13 = bitcast i8 addrspace(1)* %.12 to i8 addrspace(1)* addrspace(1)*
+  %.10 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 16
+  %.12 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 16
   br label %loop
 
 loop:
   %indvars.iv3 = phi i64 [ 0, %preheader ], [ %indvars.iv.next4, %loop ]
-  %.18 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %.13, i64 %indvars.iv3
-  %v = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %.18, align 8
-  %.20 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %.11, i64 %indvars.iv3
-  store i8 addrspace(1)* %v, i8 addrspace(1)* addrspace(1)* %.20, align 8
+  %.18 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %.12, i64 %indvars.iv3
+  %v = load ptr addrspace(1), ptr addrspace(1) %.18, align 8
+  %.20 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %.10, i64 %indvars.iv3
+  store ptr addrspace(1) %v, ptr addrspace(1) %.20, align 8
   %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1
   %.21 = icmp ult i64 %indvars.iv.next4, %2
   br i1 %.21, label %loop, label %loopexit
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll b/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll
index 8e414a3..ba777f5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr39160.ll
@@ -64,7 +64,7 @@ bb68:                                             ; preds = %bb62
   br label %bb62
 }
 
-define i32 @foo(i32 addrspace(1)* %p) {
+define i32 @foo(ptr addrspace(1) %p) {
 
 ; CHECK-LABEL: foo
 ; CHECK:       middle.block:
@@ -88,7 +88,7 @@ inner:                                            ; preds = %inner, %outer
   br i1 %5, label %inner, label %outer_latch
 
 outer_latch:                                      ; preds = %inner
-  store atomic i32 %2, i32 addrspace(1)* %p unordered, align 4
+  store atomic i32 %2, ptr addrspace(1) %p unordered, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %6 = icmp ugt i64 %iv, 63
   br i1 %6, label %exit, label %outer
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
index 0b13616..8f89581 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
@@ -14,12 +14,10 @@ define zeroext i8 @sum() {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <64 x i8> [ zeroinitializer, [[ITER_CHECK]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <64 x i8> [ zeroinitializer, [[ITER_CHECK]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [128 x i8], [128 x i8]* @bytes, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <64 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <64 x i8>, <64 x i8>* [[TMP1]], align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 64
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <64 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <64 x i8>, <64 x i8>* [[TMP3]], align 16
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [128 x i8], ptr @bytes, i64 0, i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <64 x i8>, ptr [[TMP0]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 64
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <64 x i8>, ptr [[TMP2]], align 16
 ; CHECK-NEXT:    [[TMP4]] = add <64 x i8> [[WIDE_LOAD]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP5]] = add <64 x i8> [[WIDE_LOAD2]], [[VEC_PHI1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128
@@ -36,8 +34,8 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.010 = phi i8 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [128 x i8], [128 x i8]* @bytes, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [128 x i8], ptr @bytes, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %add = add i8 %0, %r.010
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 128
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll
index 675a7ea..ecab2e6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54413-select-interleave-count-loop-with-cost-zero.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; After indvars, the backedge taken count for %loop2 becomes 1, but SCEV
 ; retains the cached original BTC, as the loop is in dead code. Make sure
 ; LV does not crash when trying to select an interleave count for a loop with zero cost.
-define void @pr54413(i64* %ptr.base) {
+define void @pr54413(ptr %ptr.base) {
 ; CHECK-LABEL: @pr54413(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP1:%.*]]
@@ -15,10 +15,10 @@ define void @pr54413(i64* %ptr.base) {
 ; CHECK:       loop2.preheader:
 ; CHECK-NEXT:    br label [[LOOP2:%.*]]
 ; CHECK:       loop2:
-; CHECK-NEXT:    [[PTR_NEXT:%.*]] = getelementptr inbounds i64, i64* [[PTR_BASE:%.*]], i64 1
+; CHECK-NEXT:    [[PTR_NEXT:%.*]] = getelementptr inbounds i64, ptr [[PTR_BASE:%.*]], i64 1
 ; CHECK-NEXT:    br i1 true, label [[LOOP2_EXIT:%.*]], label [[LOOP2]]
 ; CHECK:       loop2.exit:
-; CHECK-NEXT:    [[PTR_NEXT_LCSSA:%.*]] = phi i64* [ [[PTR_NEXT]], [[LOOP2]] ]
+; CHECK-NEXT:    [[PTR_NEXT_LCSSA:%.*]] = phi ptr [ [[PTR_NEXT]], [[LOOP2]] ]
 ; CHECK-NEXT:    br label [[LOOP1_LATCH]]
 ; CHECK:       loop1.latch:
 ; CHECK-NEXT:    br label [[LOOP1]]
@@ -34,14 +34,14 @@ loop2.preheader:
 
 loop2:
   %iv = phi i64 [ 0, %loop2.preheader ], [ %iv.next, %loop2 ]
-  %ptr = phi i64* [ %ptr.base, %loop2.preheader ], [ %ptr.next, %loop2 ]
+  %ptr = phi ptr [ %ptr.base, %loop2.preheader ], [ %ptr.next, %loop2 ]
   %iv.next = add nuw nsw i64 %iv, 1
-  %ptr.next = getelementptr inbounds i64, i64* %ptr, i64 1
+  %ptr.next = getelementptr inbounds i64, ptr %ptr, i64 1
   %cmp = icmp eq i64 %iv, 1024
   br i1 %cmp, label %loop2.exit, label %loop2
 
 loop2.exit:
-  %ptr.next.lcssa = phi i64* [ %ptr.next, %loop2 ]
+  %ptr.next.lcssa = phi ptr [ %ptr.next, %loop2 ]
   br label %loop1.latch
 
 loop1.latch:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index 941c271..60eac3e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin"
 
 ; PR15344
-define void @test1(float* nocapture %arg, i32 %arg1) nounwind {
+define void @test1(ptr nocapture %arg, i32 %arg1) nounwind {
 ; CHECK-LABEL: @test1(
 ; CHECK: preheader
 ; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0
@@ -14,18 +14,18 @@ bb:
   br label %bb2
 
 bb2:                                              ; preds = %bb
-  %tmp = load double, double* null, align 8
+  %tmp = load double, ptr null, align 8
   br i1 undef, label %bb3, label %bb12
 
 bb3:                                              ; preds = %bb3, %bb2
   %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
   %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
-  %tmp6 = getelementptr inbounds [16 x double], [16 x double]* undef, i32 0, i32 %tmp5
-  %tmp7 = load double, double* %tmp6, align 4
+  %tmp6 = getelementptr inbounds [16 x double], ptr undef, i32 0, i32 %tmp5
+  %tmp7 = load double, ptr %tmp6, align 4
   %tmp8 = add nsw i32 %tmp5, 1
   %tmp9 = fadd fast double %tmp4, undef
-  %tmp10 = getelementptr inbounds float, float* %arg, i32 %tmp5
-  store float undef, float* %tmp10, align 4
+  %tmp10 = getelementptr inbounds float, ptr %arg, i32 %tmp5
+  store float undef, ptr %tmp10, align 4
   %tmp11 = icmp eq i32 %tmp8, %arg1
   br i1 %tmp11, label %bb12, label %bb3
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll
index 9606e96..7d435cc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-small-size.ll
@@ -44,7 +44,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction:   %{{.*}} = icmp
 ; CHECK: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction:   br
 ;
-define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+define i8 @reduction_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
 entry:
   %cmp.12 = icmp sgt i32 %n, 0
   br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
@@ -64,11 +64,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %conv4 = and i32 %sum.013, 255
   %add = add nuw nsw i32 %conv, %conv4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll
index 7163f44..1660f1a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/redundant-vf2-cost.ll
@@ -10,17 +10,17 @@
 ; CHECK-NOT: LV: Found an estimated cost of {{[0-9]+}} for VF 2 For instruction:   store i32
 ; CHECK: LV: Vector loop of width 2 costs: {{[0-9]+}}.
 
-define i32 @foo(i32* %A, i32 %n) {
+define i32 @foo(ptr %A, i32 %n) {
 entry:
   %cmp3.i = icmp eq i32 %n, 0
   br i1 %cmp3.i, label %exit, label %for.body.i
 
 for.body.i:
   %iv = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
-  %ld_addr = getelementptr inbounds i32, i32* %A, i32 %iv
-  %0 = load i32, i32* %ld_addr, align 4
+  %ld_addr = getelementptr inbounds i32, ptr %A, i32 %iv
+  %0 = load i32, ptr %ld_addr, align 4
   %val = add i32 %0, 1
-  store i32 %val, i32* %ld_addr, align 4
+  store i32 %val, ptr %ld_addr, align 4
   %add.i = add nsw i32 %iv, 1
   %cmp.i = icmp eq i32 %add.i, %n
   br i1 %cmp.i, label %exit, label %for.body.i, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
index 079967d..7041a67 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll
@@ -28,9 +28,9 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
 
-define i32 @test_g(i32* nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
+define i32 @test_g(ptr nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
 entry:
-  tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !12, metadata !16), !dbg !17
+  tail call void @llvm.dbg.value(metadata ptr %a, i64 0, metadata !12, metadata !16), !dbg !17
   tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !13, metadata !16), !dbg !18
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !16), !dbg !19
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !14, metadata !16), !dbg !20
@@ -46,8 +46,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %r.08 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv, !dbg !27
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !27, !tbaa !28
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv, !dbg !27
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !27, !tbaa !28
   %add = add i32 %0, %r.08, !dbg !32
   tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !15, metadata !16), !dbg !19
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !33
@@ -70,7 +70,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
 
-define i32 @test(i32* nocapture readonly %a, i32 %n) local_unnamed_addr {
+define i32 @test(ptr nocapture readonly %a, i32 %n) local_unnamed_addr {
 entry:
   %cmp6 = icmp eq i32 %n, 0
   br i1 %cmp6, label %for.end, label %for.body.preheader
@@ -82,8 +82,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %r.08 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !28
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !28
   %add = add i32 %0, %r.08
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
index 200caf0..f6191cc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage.ll
@@ -31,11 +31,11 @@ for.cond.cleanup:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %1 to i32
   %sub = sub nsw i32 %conv, %conv3
   %ispos = icmp sgt i32 %sub, -1
@@ -73,12 +73,12 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %tmp1 = add nsw i64 %indvars.iv, 3
-  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1
-  %tmp = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %tmp1
+  %tmp = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %tmp to i32
   %tmp2 = add nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2
-  %tmp3 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %tmp2
+  %tmp3 = load i8, ptr %arrayidx2, align 1
   %conv3 = zext i8 %tmp3 to i32
   %sub = sub nsw i32 %conv, %conv3
   %ispos = icmp sgt i32 %sub, -1
@@ -90,7 +90,7 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define i64 @bar(i64* nocapture %a) {
+define i64 @bar(ptr nocapture %a) {
 ; CHECK-LABEL: bar
 ; CHECK:       LV(REG): VF = 2
 ; CHECK-NEXT: LV(REG): Found max usage: 2 item
@@ -108,10 +108,10 @@ for.cond.cleanup:
 for.body:
   %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %i.012
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %i.012
-  store i64 %add, i64* %arrayidx, align 8
+  store i64 %add, ptr %arrayidx, align 8
   %add2 = add nsw i64 %add, %s.011
   %inc = add nuw nsw i64 %i.012, 1
   %exitcond = icmp eq i64 %inc, 1024
@@ -138,12 +138,12 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv
-  %tmp = load i64, i64* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp
-  %tmp1 = load i32, i32* %arrayidx1, align 4
-  %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv
-  store i32 %tmp1, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds [0 x i64], ptr @d, i64 0, i64 %indvars.iv
+  %tmp = load i64, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds [0 x i32], ptr @e, i64 0, i64 %tmp
+  %tmp1 = load i32, ptr %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds [0 x i32], ptr @c, i64 0, i64 %indvars.iv
+  store i32 %tmp1, ptr %arrayidx3, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 10000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll b/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll
index 7e2655b..0e78193 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/register-assumption.ll
@@ -8,7 +8,7 @@ entry:
   br label %loop_exit.dim.11.critedge
 
 loop_exit.dim.11.critedge:                        ; preds = %loop_body.dim.0
-  %ptrint = ptrtoint float* %alloca to i64
+  %ptrint = ptrtoint ptr %alloca to i64
   %maskedptr = and i64 %ptrint, 4
   %maskcond = icmp eq i64 %maskedptr, 0
   br label %loop_header.dim.017.preheader
diff --git a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
index 01c1ea6..af5c921 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
@@ -13,28 +13,28 @@ target triple = "x86_64-unknown-linux"
 ;CHECK-LABEL: func1x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
-define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+define i32 @func1x6(ptr nocapture %out, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.016
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.016
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.016
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.016
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.016
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %i.016
+  %2 = load i32, ptr %arrayidx2, align 4
   %add3 = add nsw i32 %add, %2
-  %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.016
-  %3 = load i32, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %E, i64 %i.016
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %add3, %3
-  %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.016
-  %4 = load i32, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %F, i64 %i.016
+  %4 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %add5, %4
-  %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.016
-  store i32 %add7, i32* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %out, i64 %i.016
+  store i32 %add7, ptr %arrayidx8, align 4
   %inc = add i64 %i.016, 1
   %exitcond = icmp eq i64 %inc, 256
   br i1 %exitcond, label %for.end, label %for.body
@@ -47,39 +47,39 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: func2x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
-define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+define i32 @func2x6(ptr nocapture %out, ptr nocapture %out2, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.037
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.037
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.037
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.037
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.037
-  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %i.037
+  %2 = load i32, ptr %arrayidx2, align 4
   %add3 = add nsw i32 %add, %2
-  %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.037
-  %3 = load i32, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %E, i64 %i.037
+  %3 = load i32, ptr %arrayidx4, align 4
   %add5 = add nsw i32 %add3, %3
-  %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.037
-  %4 = load i32, i32* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %F, i64 %i.037
+  %4 = load i32, ptr %arrayidx6, align 4
   %add7 = add nsw i32 %add5, %4
-  %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.037
-  store i32 %add7, i32* %arrayidx8, align 4
-  %5 = load i32, i32* %arrayidx, align 4
-  %6 = load i32, i32* %arrayidx1, align 4
+  %arrayidx8 = getelementptr inbounds i32, ptr %out, i64 %i.037
+  store i32 %add7, ptr %arrayidx8, align 4
+  %5 = load i32, ptr %arrayidx, align 4
+  %6 = load i32, ptr %arrayidx1, align 4
   %add11 = add nsw i32 %6, %5
-  %7 = load i32, i32* %arrayidx2, align 4
+  %7 = load i32, ptr %arrayidx2, align 4
   %add13 = add nsw i32 %add11, %7
-  %8 = load i32, i32* %arrayidx4, align 4
+  %8 = load i32, ptr %arrayidx4, align 4
   %add15 = add nsw i32 %add13, %8
-  %9 = load i32, i32* %arrayidx6, align 4
+  %9 = load i32, ptr %arrayidx6, align 4
   %add17 = add nsw i32 %add15, %9
-  %arrayidx18 = getelementptr inbounds i32, i32* %out2, i64 %i.037
-  store i32 %add17, i32* %arrayidx18, align 4
+  %arrayidx18 = getelementptr inbounds i32, ptr %out2, i64 %i.037
+  store i32 %add17, ptr %arrayidx18, align 4
   %inc = add i64 %i.037, 1
   %exitcond = icmp eq i64 %inc, 256
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
index a08e7ca..1b8e075 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll
@@ -8,7 +8,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @no_vec(i32 %LastIndex, i16* nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) {
+define i32 @no_vec(i32 %LastIndex, ptr nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) {
 entry:
 ; MSG: LV: Selecting VF: 1.
   %cmp17 = icmp sgt i32 %LastIndex, 0
@@ -32,12 +32,12 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %Accumulator.018 = phi i64 [ 0, %for.body.lr.ph ], [ %add7, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %InputData, i64 %indvars.iv
-  %1 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %InputData, i64 %indvars.iv
+  %1 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %1 to i64
   %2 = add nsw i64 %indvars.iv, %0
-  %arrayidx3 = getelementptr inbounds i16, i16* %InputData, i64 %2
-  %3 = load i16, i16* %arrayidx3, align 2
+  %arrayidx3 = getelementptr inbounds i16, ptr %InputData, i64 %2
+  %3 = load i16, ptr %arrayidx3, align 2
   %conv4 = sext i16 %3 to i64
   %mul = mul nsw i64 %conv4, %conv
   %shr = ashr i64 %mul, %sh_prom
diff --git a/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll b/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll
index 6c6975b..2d22212 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/struct-store.ll
@@ -15,8 +15,8 @@ entry:
 
 loop:
   %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
-  %tmp = getelementptr inbounds [16 x { i64, i64 }], [16 x { i64, i64 }]* @glbl, i64 0, i64 %indvars.iv
-  store { i64, i64 } { i64 ptrtoint (void ()* @fn to i64), i64 0 }, { i64, i64 }* %tmp, align 16
+  %tmp = getelementptr inbounds [16 x { i64, i64 }], ptr @glbl, i64 0, i64 %indvars.iv
+  store { i64, i64 } { i64 ptrtoint (ptr @fn to i64), i64 0 }, ptr %tmp, align 16
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 16
diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
index b87c8ee..5640afc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
@@ -12,7 +12,7 @@ declare float @__expf_finite(float) #0
 ; CHECK-LABEL: @exp_f32
 ; CHECK: <4 x float> @__svml_expf4
 ; CHECK: ret
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -21,8 +21,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__expf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
@@ -41,7 +41,7 @@ declare double @__exp_finite(double) #0
 ; CHECK-LABEL: @exp_f64
 ; CHECK: <4 x double> @__svml_exp4
 ; CHECK: ret
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -50,8 +50,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__exp_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11
@@ -72,7 +72,7 @@ declare float @__logf_finite(float) #0
 ; CHECK-LABEL: @log_f32
 ; CHECK: <4 x float> @__svml_logf4
 ; CHECK: ret
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -81,8 +81,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__logf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -101,7 +101,7 @@ declare double @__log_finite(double) #0
 ; CHECK-LABEL: @log_f64
 ; CHECK: <4 x double> @__svml_log4
 ; CHECK: ret
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -110,8 +110,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -130,7 +130,7 @@ declare float @__powf_finite(float, float) #0
 ; CHECK-LABEL: @pow_f32
 ; CHECK: <4 x float> @__svml_powf4
 ; CHECK: ret
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 entry:
   br label %for.body
 
@@ -138,11 +138,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %indvars.iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
@@ -161,7 +161,7 @@ declare double @__pow_finite(double, double) #0
 ; CHECK-LABEL: @pow_f64
 ; CHECK: <4 x double> @__svml_pow4
 ; CHECK: ret
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 entry:
   br label %for.body
 
@@ -169,11 +169,11 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %indvars.iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
@@ -188,7 +188,7 @@ for.end:                                          ; preds = %for.body
 
 declare float @__exp2f_finite(float) #0
 
-define void @exp2f_finite(float* nocapture %varray) {
+define void @exp2f_finite(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2f_finite(
 ; CHECK:    call <4 x float> @__svml_exp2f4(<4 x float> %{{.*}})
 ; CHECK:    ret void
@@ -201,8 +201,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @__exp2f_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
@@ -217,7 +217,7 @@ for.end:
 
 declare double @__exp2_finite(double) #0
 
-define void @exp2_finite(double* nocapture %varray) {
+define void @exp2_finite(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_finite(
 ; CHECK:    call <4 x double> @__svml_exp24(<4 x double> {{.*}})
 ; CHECK:    ret void
@@ -230,8 +230,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @__exp2_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
@@ -249,7 +249,7 @@ declare float @__log2f_finite(float) #0
 ; CHECK-LABEL: @log2_f32
 ; CHECK: <4 x float> @__svml_log2f4
 ; CHECK: ret
-define void @log2_f32(float* nocapture %varray) {
+define void @log2_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -258,8 +258,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__log2f_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -278,7 +278,7 @@ declare double @__log2_finite(double) #0
 ; CHECK-LABEL: @log2_f64
 ; CHECK: <4 x double> @__svml_log24
 ; CHECK: ret
-define void @log2_f64(double* nocapture %varray) {
+define void @log2_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -287,8 +287,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log2_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -306,7 +306,7 @@ declare float @__log10f_finite(float) #0
 ; CHECK-LABEL: @log10_f32
 ; CHECK: <4 x float> @__svml_log10f4
 ; CHECK: ret
-define void @log10_f32(float* nocapture %varray) {
+define void @log10_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -315,8 +315,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__log10f_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -335,7 +335,7 @@ declare double @__log10_finite(double) #0
 ; CHECK-LABEL: @log10_f64
 ; CHECK: <4 x double> @__svml_log104
 ; CHECK: ret
-define void @log10_f64(double* nocapture %varray) {
+define void @log10_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -344,8 +344,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__log10_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
@@ -363,7 +363,7 @@ declare float @__sqrtf_finite(float) #0
 ; CHECK-LABEL: @sqrt_f32
 ; CHECK: <4 x float> @__svml_sqrtf4
 ; CHECK: ret
-define void @sqrt_f32(float* nocapture %varray) {
+define void @sqrt_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -372,8 +372,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call fast float @__sqrtf_finite(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
@@ -392,7 +392,7 @@ declare double @__sqrt_finite(double) #0
 ; CHECK-LABEL: @sqrt_f64
 ; CHECK: <4 x double> @__svml_sqrt4
 ; CHECK: ret
-define void @sqrt_f64(double* nocapture %varray) {
+define void @sqrt_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -401,8 +401,8 @@ for.body:                                         ; preds = %for.body, %entry
   %tmp = trunc i64 %indvars.iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call fast double @__sqrt_finite(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %indvars.iv
+  store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
index f843fc1..005557d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -46,7 +46,7 @@ declare float @exp2f(float) #0
 declare double @llvm.exp2.f64(double) #0
 declare float @llvm.exp2.f32(float) #0
 
-define void @sin_f64(double* nocapture %varray) {
+define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -59,8 +59,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sin(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -69,7 +69,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32(float* nocapture %varray) {
+define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -82,8 +82,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sinf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -92,7 +92,7 @@ for.end:
   ret void
 }
 
-define void @sin_f64_intrinsic(double* nocapture %varray) {
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -105,8 +105,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.sin.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -115,7 +115,7 @@ for.end:
   ret void
 }
 
-define void @sin_f32_intrinsic(float* nocapture %varray) {
+define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sinf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -128,8 +128,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.sin.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -138,7 +138,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64(double* nocapture %varray) {
+define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -151,8 +151,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @cos(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -161,7 +161,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32(float* nocapture %varray) {
+define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -174,8 +174,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @cosf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -184,7 +184,7 @@ for.end:
   ret void
 }
 
-define void @cos_f64_intrinsic(double* nocapture %varray) {
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_cos4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -197,8 +197,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.cos.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -207,7 +207,7 @@ for.end:
   ret void
 }
 
-define void @cos_f32_intrinsic(float* nocapture %varray) {
+define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_cosf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -220,8 +220,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.cos.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -230,7 +230,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64(
 ; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -242,11 +242,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @pow(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -255,7 +255,7 @@ for.end:
   ret void
 }
 
-define void @pow_f64_intrinsic(double* nocapture %varray, double* nocapture readonly %exp) {
+define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64_intrinsic(
 ; CHECK:    [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -267,11 +267,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
-  %arrayidx = getelementptr inbounds double, double* %exp, i64 %iv
-  %tmp1 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
+  %tmp1 = load double, ptr %arrayidx, align 4
   %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
-  %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %tmp2, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -280,7 +280,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32(
 ; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -292,11 +292,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @powf(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -305,7 +305,7 @@ for.end:
   ret void
 }
 
-define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) {
+define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrinsic(
 ; CHECK:    [[TMP8:%.*]] = call <4 x float> @__svml_powf4(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:    ret void
@@ -317,11 +317,11 @@ for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
-  %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv
-  %tmp1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
+  %tmp1 = load float, ptr %arrayidx, align 4
   %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
-  %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %tmp2, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %tmp2, ptr %arrayidx2, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -330,7 +330,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64(double* nocapture %varray) {
+define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -343,8 +343,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -353,7 +353,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32(float* nocapture %varray) {
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -366,8 +366,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @expf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -376,7 +376,7 @@ for.end:
   ret void
 }
 
-define void @exp_f64_intrinsic(double* nocapture %varray) {
+define void @exp_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -389,8 +389,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -399,7 +399,7 @@ for.end:
   ret void
 }
 
-define void @exp_f32_intrinsic(float* nocapture %varray) {
+define void @exp_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -412,8 +412,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -422,7 +422,7 @@ for.end:
   ret void
 }
 
-define void @log_f64(double* nocapture %varray) {
+define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -435,8 +435,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -445,7 +445,7 @@ for.end:
   ret void
 }
 
-define void @log_f32(float* nocapture %varray) {
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -458,8 +458,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @logf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -468,7 +468,7 @@ for.end:
   ret void
 }
 
-define void @log_f64_intrinsic(double* nocapture %varray) {
+define void @log_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -481,8 +481,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -491,7 +491,7 @@ for.end:
   ret void
 }
 
-define void @log_f32_intrinsic(float* nocapture %varray) {
+define void @log_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_logf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -504,8 +504,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -514,7 +514,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64(double* nocapture %varray) {
+define void @log2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -527,8 +527,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -537,7 +537,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32(float* nocapture %varray) {
+define void @log2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -550,8 +550,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -560,7 +560,7 @@ for.end:
   ret void
 }
 
-define void @log2_f64_intrinsic(double* nocapture %varray) {
+define void @log2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -573,8 +573,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -583,7 +583,7 @@ for.end:
   ret void
 }
 
-define void @log2_f32_intrinsic(float* nocapture %varray) {
+define void @log2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -596,8 +596,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -606,7 +606,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64(double* nocapture %varray) {
+define void @log10_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -619,8 +619,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @log10(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -629,7 +629,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32(float* nocapture %varray) {
+define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -642,8 +642,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @log10f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -652,7 +652,7 @@ for.end:
   ret void
 }
 
-define void @log10_f64_intrinsic(double* nocapture %varray) {
+define void @log10_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -665,8 +665,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.log10.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -675,7 +675,7 @@ for.end:
   ret void
 }
 
-define void @log10_f32_intrinsic(float* nocapture %varray) {
+define void @log10_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -688,8 +688,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.log10.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -698,7 +698,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f64(double* nocapture %varray) {
+define void @sqrt_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -711,8 +711,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @sqrt(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -721,7 +721,7 @@ for.end:
   ret void
 }
 
-define void @sqrt_f32(float* nocapture %varray) {
+define void @sqrt_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sqrt_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -734,8 +734,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @sqrtf(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -744,7 +744,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64(double* nocapture %varray) {
+define void @exp2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -757,8 +757,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @exp2(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -767,7 +767,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32(float* nocapture %varray) {
+define void @exp2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -780,8 +780,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @exp2f(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -790,7 +790,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f64_intrinsic(double* nocapture %varray) {
+define void @exp2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -803,8 +803,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to double
   %call = tail call double @llvm.exp2.f64(double %conv)
-  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
-  store double %call, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
@@ -813,7 +813,7 @@ for.end:
   ret void
 }
 
-define void @exp2_f32_intrinsic(float* nocapture %varray) {
+define void @exp2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32_intrinsic(
 ; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_exp2f4(<4 x float> [[TMP4:%.*]])
 ; CHECK:    ret void
@@ -826,8 +826,8 @@ for.body:
   %tmp = trunc i64 %iv to i32
   %conv = sitofp i32 %tmp to float
   %call = tail call float @llvm.exp2.f32(float %conv)
-  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
-  store float %call, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll
index 982f5be..b98a2ea 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/tail_folding_and_assume_safety.ll
@@ -21,7 +21,7 @@ target triple = "x86_64-pc-linux-gnu"
 ;CHECK: call void @llvm.masked.store
 
 ; Function Attrs: nofree norecurse nounwind uwtable
-define dso_local void @fold_tail(i32* noalias nocapture %p, i32* noalias nocapture readonly %q1, i32* noalias nocapture readonly %q2,
+define dso_local void @fold_tail(ptr noalias nocapture %p, ptr noalias nocapture readonly %q1, ptr noalias nocapture readonly %q2,
 i32 %guard) local_unnamed_addr #0 {
 entry:
   %0 = sext i32 %guard to i64
@@ -36,13 +36,13 @@ for.body:
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx = getelementptr inbounds i32, i32* %q1, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4, !tbaa !2
-  %arrayidx3 = getelementptr inbounds i32, i32* %q2, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx3, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %q1, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2
+  %arrayidx3 = getelementptr inbounds i32, ptr %q2, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx3, align 4, !tbaa !2
   %add = add nsw i32 %2, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx5, align 4, !tbaa !2
+  %arrayidx5 = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx5, align 4, !tbaa !2
   br label %for.inc
 
 for.inc:
@@ -67,7 +67,7 @@ for.inc:
 ;CHECK:  call void @llvm.masked.store
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @assume_safety(i32* nocapture, i32* nocapture readonly, i32* nocapture readonly, i32) local_unnamed_addr #0 {
+define void @assume_safety(ptr nocapture, ptr nocapture readonly, ptr nocapture readonly, i32) local_unnamed_addr #0 {
   %5 = sext i32 %3 to i64
   br label %7
 
@@ -80,13 +80,13 @@ define void @assume_safety(i32* nocapture, i32* nocapture readonly, i32* nocaptu
   br i1 %9, label %10, label %17
 
 ; <label>:10:
-  %11 = getelementptr inbounds i32, i32* %1, i64 %8
-  %12 = load i32, i32* %11, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
-  %13 = getelementptr inbounds i32, i32* %2, i64 %8
-  %14 = load i32, i32* %13, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
+  %11 = getelementptr inbounds i32, ptr %1, i64 %8
+  %12 = load i32, ptr %11, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
+  %13 = getelementptr inbounds i32, ptr %2, i64 %8
+  %14 = load i32, ptr %13, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
   %15 = add nsw i32 %14, %12
-  %16 = getelementptr inbounds i32, i32* %0, i64 %8
-  store i32 %15, i32* %16, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
+  %16 = getelementptr inbounds i32, ptr %0, i64 %8
+  store i32 %15, ptr %16, align 4, !tbaa !2, !llvm.mem.parallel_loop_access !6
   br label %17
 
 ; <label>:17:
@@ -113,7 +113,7 @@ define void @assume_safety(i32* nocapture, i32* nocapture readonly, i32* nocaptu
 ;CHECK: call void @llvm.masked.store
 
 ; Function Attrs: nofree norecurse nounwind uwtable
-define dso_local void @fold_tail_and_assume_safety(i32* noalias nocapture %p, i32* noalias nocapture readonly %q1, i32* noalias nocapture readonly %q2,
+define dso_local void @fold_tail_and_assume_safety(ptr noalias nocapture %p, ptr noalias nocapture readonly %q1, ptr noalias nocapture readonly %q2,
 i32 %guard) local_unnamed_addr #0 {
 entry:
   %0 = sext i32 %guard to i64
@@ -128,13 +128,13 @@ for.body:
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx = getelementptr inbounds i32, i32* %q1, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4, !tbaa !2, !llvm.access.group !10
-  %arrayidx3 = getelementptr inbounds i32, i32* %q2, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx3, align 4, !tbaa !2, !llvm.access.group !10
+  %arrayidx = getelementptr inbounds i32, ptr %q1, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4, !tbaa !2, !llvm.access.group !10
+  %arrayidx3 = getelementptr inbounds i32, ptr %q2, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx3, align 4, !tbaa !2, !llvm.access.group !10
   %add = add nsw i32 %2, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx5, align 4, !tbaa !2, !llvm.access.group !10
+  %arrayidx5 = getelementptr inbounds i32, ptr %p, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx5, align 4, !tbaa !2, !llvm.access.group !10
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
index 90e8a3f..a788306 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -22,10 +22,10 @@ for.body.preheader:
 
 for.body:
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @big, i32 0, i32 %i.07
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [0 x i32], ptr @big, i32 0, i32 %i.07
+  %0 = load i32, ptr %arrayidx, align 4
   %neg = xor i32 %0, -1
-  store i32 %neg, i32* %arrayidx, align 4
+  store i32 %neg, ptr %arrayidx, align 4
   %inc = add nsw i32 %i.07, 1
   %conv = sext i32 %inc to i64
   %cmp = icmp slt i64 %conv, %count
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
index 8863477..ef0402a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
@@ -8,16 +8,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: cost of 4 for VF 1 For instruction:   %conv = uitofp i64 %tmp to double
 ; CHECK: cost of 5 for VF 2 For instruction:   %conv = uitofp i64 %tmp to double
 ; CHECK: cost of 10 for VF 4 For instruction:   %conv = uitofp i64 %tmp to double
-define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind {
+define void @uint64_to_double_cost(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) nounwind {
 entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %tmp = load i64, i64* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %tmp = load i64, ptr %arrayidx, align 4
   %conv = uitofp i64 %tmp to double
-  %arrayidx2 = getelementptr inbounds double, double* %b, i64 %indvars.iv
-  store double %conv, double* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+  store double %conv, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
index 96f92c9..f064115 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform-phi.ll
@@ -8,17 +8,17 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-DAG: LV: Found uniform instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; CHECK-DAG: LV: Found uniform instruction:   %exitcond = icmp eq i64 %indvars.iv, 1599
 
-define void @test(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+define void @test(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %tmp0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %tmp0 = load float, ptr %arrayidx, align 4
   %add = fadd float %tmp0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -29,18 +29,18 @@ for.end:                                          ; preds = %for.body
 
 ; CHECK-LABEL: foo
 ; CHECK-DAG: LV: Found uniform instruction:   %cond = icmp eq i64 %i.next, %n
-; CHECK-DAG: LV: Found uniform instruction:   %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
+; CHECK-DAG: LV: Found uniform instruction:   %tmp1 = getelementptr inbounds i32, ptr %a, i32 %tmp0
 ; CHECK-NOT: LV: Found uniform instruction:   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
 
-define void @foo(i32* %a, i64 %n) {
+define void @foo(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
   %tmp0 = trunc i64 %i to i32
-  %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
-  store i32 %tmp0, i32* %tmp1, align 4
+  %tmp1 = getelementptr inbounds i32, ptr %a, i32 %tmp0
+  store i32 %tmp0, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp eq i64 %i.next, %n
   br i1 %cond, label %for.end, label %for.body
@@ -55,17 +55,17 @@ for.end:
 ; CHECK-DAG: LV: Found uniform instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
 ; CHECK-DAG: LV: Found uniform instruction:   %exitcond = icmp eq i64 %indvars.iv, 1599
 
-define i64 @goo(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+define i64 @goo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %tmp0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %tmp0 = load float, ptr %arrayidx, align 4
   %add = fadd float %tmp0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -78,7 +78,7 @@ for.end:                                          ; preds = %for.body
 ; CHECK-LABEL: PR38786
 ; Check that first order recurrence phis (%phi32 and %phi64) are not uniform.
 ; CHECK-NOT: LV: Found uniform instruction:   %phi
-define void @PR38786(double* %y, double* %x, i64 %n) {
+define void @PR38786(ptr %y, ptr %x, i64 %n) {
 entry:
   br label %for.body
 
@@ -87,10 +87,10 @@ for.body:
   %phi64 = phi i64 [ 0, %entry ], [ %i64next, %for.body ]
   %i32next = add i32 %phi32, 1
   %i64next = zext i32 %i32next to i64
-  %xip = getelementptr inbounds double, double* %x, i64 %i64next
-  %yip = getelementptr inbounds double, double* %y, i64 %phi64
-  %xi = load double, double* %xip, align 8
-  store double %xi, double* %yip, align 8
+  %xip = getelementptr inbounds double, ptr %x, i64 %i64next
+  %yip = getelementptr inbounds double, ptr %y, i64 %phi64
+  %xi = load double, ptr %xip, align 8
+  store double %xi, ptr %yip, align 8
   %cmp = icmp slt i64 %i64next, %n
   br i1 %cmp, label %for.body, label %for.end
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index 45812ff..7b2e65d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -passes=loop-vectorize -S -mcpu=core-avx2 < %s | FileCheck %s
 
 ;float inc = 0.5;
-;void foo(float *A, unsigned N) {
+;void foo(ptr A, unsigned N) {
 ;
 ;  for (unsigned i=0; i<N; i++){
 ;    A[i] += inc;
@@ -19,7 +19,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @inc = global float 5.000000e-01, align 4
 
-define void @foo(float* nocapture %A, i32 %N) #0 {
+define void @foo(ptr nocapture %A, i32 %N) #0 {
 entry:
   %cmp3 = icmp eq i32 %N, 0
   br i1 %cmp3, label %for.end, label %for.body.preheader
@@ -29,11 +29,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %0 = load float, float* @inc, align 4
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %0 = load float, ptr @inc, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, %1
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index cac4757..d56740c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -5,16 +5,16 @@
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %shift = ashr i32 %val, %k
 ; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction:   %shift = ashr i32 %val, %k
 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction:   %shift = ashr i32 %val, %k
-define void @foo(i32* nocapture %p, i32 %k) local_unnamed_addr #0 {
+define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
 entry:
   br label %body
 
 body:
   %i = phi i64 [ 0, %entry ], [ %next, %body ]
-  %ptr = getelementptr inbounds i32, i32* %p, i64 %i
-  %val = load i32, i32* %ptr, align 4
+  %ptr = getelementptr inbounds i32, ptr %p, i64 %i
+  %val = load i32, ptr %ptr, align 4
   %shift = ashr i32 %val, %k
-  store i32 %shift, i32* %ptr, align 4
+  store i32 %shift, ptr %ptr, align 4
   %next = add nuw nsw i64 %i, 1
   %cmp = icmp eq i64 %next, 16
   br i1 %cmp, label %exit, label %body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
index fc1c154..8971dfe 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -11,16 +11,16 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK-NOUNRL: store <4 x i32>
 ;CHECK-NOUNRL-NOT: store <4 x i32>
 ;CHECK-NOUNRL: ret
-define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, 6
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index d8cec93..290be56 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -16,20 +16,20 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK-VECTOR: ret
 ;
 ; CHECK-SCALAR-LABEL: @foo(
-; CHECK-SCALAR: load i32, i32*
-; CHECK-SCALAR-NOT: load i32, i32*
+; CHECK-SCALAR: load i32, ptr
+; CHECK-SCALAR-NOT: load i32, ptr
 ; CHECK-SCALAR: store i32
 ; CHECK-SCALAR-NOT: store i32
 ; CHECK-SCALAR: ret
-define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
+define i32 @foo(ptr nocapture %A) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, 6
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 100
@@ -53,16 +53,16 @@ define i32 @foo(i32* nocapture %A) nounwind uwtable ssp {
 ; CHECK-SCALAR: store i32
 ; CHECK-SCALAR-NOT: store i32
 ; CHECK-SCALAR: ret
-define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
+define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i32 %3, 6
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -82,17 +82,17 @@ define i32 @bar(i32* nocapture %A, i32 %n) nounwind uwtable ssp {
 ; CHECK-SCALAR-LABEL: @runtime_chk(
 ; CHECK-SCALAR: store float
 ; CHECK-SCALAR-NOT: store float
-define void @runtime_chk(float* %A, float* %B, float %N) {
+define void @runtime_chk(ptr %A, ptr %B, float %N) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %0, %N
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %mul, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
index 3302bdb..27dcac6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -10,14 +10,14 @@ target triple = "x86_64-apple-macosx10.8.0"
 ;CHECK: store <4 x double>
 ;CHECK-NOT: store <4 x double>
 ;CHECK: ret
-define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
+define void @reg_pressure(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
   %1 = sext i32 %n to i64
   br label %2
 
 ; <label>:2                                       ; preds = %2, %0
   %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
-  %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
-  %4 = load double, double* %3, align 8
+  %3 = getelementptr inbounds double, ptr %A, i64 %indvars.iv
+  %4 = load double, ptr %3, align 8
   %5 = fadd double %4, 3.000000e+00
   %6 = fmul double %4, 2.000000e+00
   %7 = fadd double %5, %6
@@ -37,7 +37,7 @@ define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
   %21 = fadd double %20, %17
   %22 = fadd double %21, 3.000000e+00
   %23 = fmul double %4, %22
-  store double %23, double* %3, align 8
+  store double %23, ptr %3, align 8
   %indvars.iv.next = add i64 %indvars.iv, -1
   %24 = trunc i64 %indvars.iv to i32
   %25 = icmp eq i32 %24, 0
@@ -52,16 +52,16 @@ define void @reg_pressure(double* nocapture %A, i32 %n) nounwind uwtable ssp {
 ;CHECK: xor
 ;CHECK: xor
 ;CHECK: ret
-define void @small_loop(i16* nocapture %A, i64 %n) nounwind uwtable ssp {
+define void @small_loop(ptr nocapture %A, i64 %n) nounwind uwtable ssp {
   %1 = icmp eq i64 %n, 0
   br i1 %1, label %._crit_edge, label %.lr.ph
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i16, i16* %A, i64 %i.01
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %A, i64 %i.01
+  %3 = load i16, ptr %2, align 2
   %4 = xor i16 %3, 3
-  store i16 %4, i16* %2, align 2
+  store i16 %4, ptr %2, align 2
   %5 = add i64 %i.01, 1
   %exitcond = icmp eq i64 %5, %n
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
diff --git a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
index 50c009c..2e78a96 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
@@ -7,18 +7,18 @@ target triple = "x86_64-unknown-linux-gnu"
 ;CHECK: vsqrtf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @sqrtf(float) nounwind readnone
-define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sqrt_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sqrtf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -32,18 +32,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vexpf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @expf(float) nounwind readnone
-define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @expf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -57,18 +57,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlogf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @logf(float) nounwind readnone
-define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @logf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -83,18 +83,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: fabs{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @fabsf(float) nounwind readnone
-define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @fabs_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @fabsf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -109,18 +109,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vexpf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @llvm.exp.f32(float) nounwind readnone
-define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp_f32_intrin(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -135,18 +135,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-NOT: foo{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @foo(float) nounwind readnone
-define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @foo_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @foo(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -160,18 +160,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-LABEL: @sqrt_f32_nobuiltin(
 ;CHECK-NOT: vsqrtf{{.*}}<4 x float>
 ;CHECK: ret void
-define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sqrt_f32_nobuiltin(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -185,18 +185,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vceilf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @ceilf(float) nounwind readnone
-define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @ceil_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @ceilf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -210,18 +210,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vfloorf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @floorf(float) nounwind readnone
-define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @floor_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @floorf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -235,18 +235,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vexpm1f{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @expm1f(float) nounwind readnone
-define void @expm1_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @expm1_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @expm1f(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -260,18 +260,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlog1pf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @log1pf(float) nounwind readnone
-define void @log1p_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log1p_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @log1pf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -285,18 +285,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlog10f{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @log10f(float) nounwind readnone
-define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log10_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @log10f(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -310,18 +310,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vlogbf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @logbf(float) nounwind readnone
-define void @logb_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @logb_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @logbf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -335,18 +335,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vsinf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @sinf(float) nounwind readnone
-define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sinf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -360,18 +360,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vcosf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @cosf(float) nounwind readnone
-define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @cos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @cosf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -385,18 +385,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vtanf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @tanf(float) nounwind readnone
-define void @tan_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @tan_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @tanf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -410,18 +410,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vasinf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @asinf(float) nounwind readnone
-define void @asin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @asin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @asinf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -435,18 +435,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vacosf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @acosf(float) nounwind readnone
-define void @acos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @acos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @acosf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -460,18 +460,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vatanf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @atanf(float) nounwind readnone
-define void @atan_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @atan_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @atanf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -485,18 +485,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vsinhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @sinhf(float) nounwind readnone
-define void @sinh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sinh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @sinhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -510,18 +510,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vcoshf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @coshf(float) nounwind readnone
-define void @cosh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @cosh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @coshf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -535,18 +535,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vtanhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @tanhf(float) nounwind readnone
-define void @tanh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @tanh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @tanhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -560,18 +560,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vasinhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @asinhf(float) nounwind readnone
-define void @asinh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @asinh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @asinhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -585,18 +585,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vacoshf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @acoshf(float) nounwind readnone
-define void @acosh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @acosh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @acoshf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -610,18 +610,18 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: vatanhf{{.*}}<4 x float>
 ;CHECK: ret void
 declare float @atanhf(float) nounwind readnone
-define void @atanh_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @atanh_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @atanhf(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index 73f13c7..4c0e07e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; The source code for the test:
 ;
 ; #include <math.h>
-; void foo(float* restrict A, float * restrict B)
+; void foo(ptr restrict A, ptr restrict B)
 ; {
 ;   for (int i = 0; i < 1000; i+=2) A[i] = sinf(B[i]);
 ; }
@@ -25,17 +25,17 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; This loop will be vectorized, although the scalar cost is lower than any of vector costs, but vectorization is explicitly forced in metadata.
 ;
 
-define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B) {
+define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !llvm.access.group !11
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !llvm.access.group !11
   %call = tail call float @llvm.sin.f32(float %0)
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !llvm.access.group !11
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4, !llvm.access.group !11
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1000
@@ -56,17 +56,17 @@ for.end:
 ; This method will not be vectorized, as scalar cost is lower than any of vector costs.
 ;
 
-define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B) {
+define void @not_vectorized(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !llvm.access.group !13
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !llvm.access.group !13
   %call = tail call float @llvm.sin.f32(float %0)
-  %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx2 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4, !llvm.access.group !13
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1000
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
index 04b95e42..234587a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
@@ -15,19 +15,19 @@ define void @scalarselect(i1 %cond) {
 
 ; <label>:1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
 
 ; CHECK: cost of 1 for VF 1 {{.*}}  select i1 %cond, i32 %6, i32 0
 ; CHECK: cost of 2 for VF 2 {{.*}}  select i1 %cond, i32 %6, i32 0
 ; CHECK: cost of 2 for VF 4 {{.*}}  select i1 %cond, i32 %6, i32 0
 
   %sel = select i1 %cond, i32 %6, i32 zeroinitializer
-  store i32 %sel, i32* %7, align 4
+  store i32 %sel, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -43,12 +43,12 @@ define void @vectorselect(i1 %cond) {
 
 ; <label>:1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %8 = icmp ult i64 %indvars.iv, 8
 
 ; CHECK: cost of 1 for VF 1 {{.*}}  select i1 %8, i32 %6, i32 0
@@ -56,7 +56,7 @@ define void @vectorselect(i1 %cond) {
 ; CHECK: cost of 2 for VF 4 {{.*}}  select i1 %8, i32 %6, i32 0
 
   %sel = select i1 %8, i32 %6, i32 zeroinitializer
-  store i32 %sel, i32* %7, align 4
+  store i32 %sel, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
index e9149cf..2a6b15b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -28,20 +28,20 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1000 x i8], [1000 x i8]* @b, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx2 = getelementptr inbounds [1000 x i8], [1000 x i8]* @c, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx = getelementptr inbounds [1000 x i8], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx2 = getelementptr inbounds [1000 x i8], ptr @c, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1
   %add = add i8 %1, %0
-  %arrayidx6 = getelementptr inbounds [1000 x i8], [1000 x i8]* @a, i64 0, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx6, align 1
-  %arrayidx8 = getelementptr inbounds [1000 x i32], [1000 x i32]* @v, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx8, align 4
-  %arrayidx10 = getelementptr inbounds [1000 x i32], [1000 x i32]* @w, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx10, align 4
+  %arrayidx6 = getelementptr inbounds [1000 x i8], ptr @a, i64 0, i64 %indvars.iv
+  store i8 %add, ptr %arrayidx6, align 1
+  %arrayidx8 = getelementptr inbounds [1000 x i32], ptr @v, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx8, align 4
+  %arrayidx10 = getelementptr inbounds [1000 x i32], ptr @w, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx10, align 4
   %add11 = add nsw i32 %3, %2
-  %arrayidx13 = getelementptr inbounds [1000 x i32], [1000 x i32]* @u, i64 0, i64 %indvars.iv
-  store i32 %add11, i32* %arrayidx13, align 4
+  %arrayidx13 = getelementptr inbounds [1000 x i32], ptr @u, i64 0, i64 %indvars.iv
+  store i32 %add11, ptr %arrayidx13, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -49,7 +49,7 @@ for.body:
 
 ; We should not choose a VF larger than the constant TC.
 ; VF chosen should be atmost 16 (not the max possible vector width = 32 for AVX2)
-define void @not_too_small_tc(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) {
+define void @not_too_small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
 ; CHECK-LABEL: not_too_small_tc
 ; CHECK-AVX2: LV: Selecting VF: 16.
 entry:
@@ -57,12 +57,12 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv
-  %l1 = load i8, i8* %arrayidx, align 4, !llvm.access.group !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %l2 = load i8, i8* %arrayidx2, align 4, !llvm.access.group !13
+  %arrayidx = getelementptr inbounds i8, ptr %B, i64 %indvars.iv
+  %l1 = load i8, ptr %arrayidx, align 4, !llvm.access.group !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  %l2 = load i8, ptr %arrayidx2, align 4, !llvm.access.group !13
   %add = add i8 %l1, %l2
-  store i8 %add, i8* %arrayidx2, align 4, !llvm.access.group !13
+  store i8 %add, ptr %arrayidx2, align 4, !llvm.access.group !13
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 16
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index 9f798ca..5d110e7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -4,10 +4,10 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-%0 = type { %0*, %1 }
-%1 = type { i8*, i32 }
+%0 = type { ptr, %1 }
+%1 = type { ptr, i32 }
 
-@p = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@p = global [2048 x [8 x ptr]] zeroinitializer, align 16
 @q = global [2048 x i16] zeroinitializer, align 16
 @r = global [2048 x i16] zeroinitializer, align 16
 
@@ -19,19 +19,19 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: test_consecutive_store
 ; CHECK: LV: The Smallest and Widest types: 64 / 64 bits.
 ; CHECK: LV: Selecting VF: 4
-define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
-  %4 = load %0*, %0** %2, align 8
-  %5 = icmp eq %0** %0, %1
+define void @test_consecutive_store(ptr, ptr, ptr nocapture) nounwind ssp uwtable align 2 {
+  %4 = load ptr, ptr %2, align 8
+  %5 = icmp eq ptr %0, %1
   br i1 %5, label %12, label %6
 
 ; <label>:6                                       ; preds = %3
   br label %7
 
 ; <label>:7                                       ; preds = %7, %6
-  %8 = phi %0** [ %0, %6 ], [ %9, %7 ]
-  store %0* %4, %0** %8, align 8
-  %9 = getelementptr inbounds %0*, %0** %8, i64 1
-  %10 = icmp eq %0** %9, %1
+  %8 = phi ptr [ %0, %6 ], [ %9, %7 ]
+  store ptr %4, ptr %8, align 8
+  %9 = getelementptr inbounds ptr, ptr %8, i64 1
+  %10 = icmp eq ptr %9, %1
   br i1 %10, label %11, label %7
 
 ; <label>:11                                      ; preds = %7
@@ -63,13 +63,13 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
 
 ; <label>:3                                       ; preds = %3, %1
   %4 = phi i64 [ 0, %1 ], [ %11, %3 ]
-  %5 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q, i64 0, i64 %4
-  %6 = load i16, i16* %5, align 2
+  %5 = getelementptr inbounds [2048 x i16], ptr @q, i64 0, i64 %4
+  %6 = load i16, ptr %5, align 2
   %7 = sext i16 %6 to i64
   %8 = add i64 %7, 1
-  %9 = inttoptr i64 %8 to i32*
-  %10 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p, i64 0, i64 %4, i64 %2
-  store i32* %9, i32** %10, align 8
+  %9 = inttoptr i64 %8 to ptr
+  %10 = getelementptr inbounds [2048 x [8 x ptr]], ptr @p, i64 0, i64 %4, i64 %2
+  store ptr %9, ptr %10, align 8
   %11 = add i64 %4, 1
   %12 = trunc i64 %11 to i32
   %13 = icmp ne i32 %12, 1024
@@ -86,10 +86,10 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
 }
 
 
-@ia = global [1024 x i32*] zeroinitializer, align 16
+@ia = global [1024 x ptr] zeroinitializer, align 16
 @ib = global [1024 x i32] zeroinitializer, align 16
 @ic = global [1024 x i8] zeroinitializer, align 16
-@p2 = global [2048 x [8 x i32*]] zeroinitializer, align 16
+@p2 = global [2048 x [8 x ptr]] zeroinitializer, align 16
 @q2 = global [2048 x i16] zeroinitializer, align 16
 
 ;; Now we check the same rules for loads. We should take consecutive loads of
@@ -103,9 +103,9 @@ define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
 ; <label>:1                                       ; preds = %1, %0
   %2 = phi i64 [ 0, %0 ], [ %10, %1 ]
   %3 = phi i8 [ 0, %0 ], [ %9, %1 ]
-  %4 = getelementptr inbounds [1024 x i32*], [1024 x i32*]* @ia, i32 0, i64 %2
-  %5 = load i32*, i32** %4, align 4
-  %6 = ptrtoint i32* %5 to i64
+  %4 = getelementptr inbounds [1024 x ptr], ptr @ia, i32 0, i64 %2
+  %5 = load ptr, ptr %4, align 4
+  %6 = ptrtoint ptr %5 to i64
   %7 = trunc i64 %6 to i8
   %8 = add i8 %3, 1
   %9 = add i8 %7, %8
@@ -131,12 +131,12 @@ define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
 
 ; <label>:3                                       ; preds = %3, %1
   %4 = phi i64 [ 0, %1 ], [ %10, %3 ]
-  %5 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
-  %6 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q2, i64 0, i64 %4
-  %7 = load i32*, i32** %5, align 2
-  %8 = ptrtoint i32* %7 to i64
+  %5 = getelementptr inbounds [2048 x [8 x ptr]], ptr @p2, i64 0, i64 %4, i64 %2
+  %6 = getelementptr inbounds [2048 x i16], ptr @q2, i64 0, i64 %4
+  %7 = load ptr, ptr %5, align 2
+  %8 = ptrtoint ptr %7 to i64
   %9 = trunc i64 %8 to i16
-  store i16 %9, i16* %6, align 8
+  store i16 %9, ptr %6, align 8
   %10 = add i64 %4, 1
   %11 = trunc i64 %10 to i32
   %12 = icmp ne i32 %11, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
index 6429a39..4dfbdf1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
@@ -17,17 +17,17 @@ entry:
   %diff = alloca i32, align 4
   %cb = alloca [16 x i8], align 16
   %cc = alloca [16 x i8], align 16
-  store i32 0, i32* %diff, align 4, !tbaa !11
+  store i32 0, ptr %diff, align 4, !tbaa !11
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1, !tbaa !21
+  %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !21
   %conv = sext i8 %0 to i32
-  %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv
-  %1 = load i8, i8* %arrayidx2, align 1, !tbaa !21
+  %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv
+  %1 = load i8, ptr %arrayidx2, align 1, !tbaa !21
   %conv3 = sext i8 %1 to i32
   %sub = sub i32 %conv, %conv3
   %add = add nsw i32 %sub, %add8
@@ -36,12 +36,12 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !25
 
 for.end:                                          ; preds = %for.body
-  store i32 %add, i32* %diff, align 4, !tbaa !11
-  call void @ibar(i32* %diff) #2
+  store i32 %add, ptr %diff, align 4, !tbaa !11
+  call void @ibar(ptr %diff) #2
   ret i32 0
 }
 
-declare void @ibar(i32*) #1
+declare void @ibar(ptr) #1
 
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 8673e58..de2fe12 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -136,17 +136,17 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define void @_Z4testPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp10 = icmp sgt i32 %Length, 0, !dbg !12
   br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !16
   %0 = trunc i64 %indvars.iv to i32, !dbg !16
-  %ld = load i32, i32* %arrayidx, align 4
-  store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
+  %ld = load i32, ptr %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4, !dbg !16, !tbaa !18
   %cmp3 = icmp sle i32 %ld, %Length, !dbg !22
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
   %1 = trunc i64 %indvars.iv.next to i32
@@ -163,16 +163,16 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: ret
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
+define void @_Z13test_disabledPii(ptr nocapture %A, i32 %Length) #0 !dbg !7 {
 entry:
   %cmp4 = icmp sgt i32 %Length, 0, !dbg !25
   br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !30
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !30
   %0 = trunc i64 %indvars.iv to i32, !dbg !30
-  store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18
+  store i32 %0, ptr %arrayidx, align 4, !dbg !30, !tbaa !18
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !25
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !25
@@ -187,7 +187,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: ret
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
+define void @_Z17test_array_boundsPiS_i(ptr nocapture %A, ptr nocapture readonly %B, i32 %Length) #0 !dbg !8 {
 entry:
   %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
   br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
@@ -197,13 +197,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv, !dbg !35
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !35, !tbaa !18
   %idxprom1 = sext i32 %0 to i64, !dbg !35
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
-  store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !35
+  store i32 %1, ptr %arrayidx4, align 4, !dbg !35, !tbaa !18
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32
@@ -221,21 +221,21 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK: ret
 
 ; Function Attrs: nounwind uwtable
-define i32 @test_multiple_failures(i32* nocapture readonly %A) #0 !dbg !46 {
+define i32 @test_multiple_failures(ptr nocapture readonly %A) #0 !dbg !46 {
 entry:
   br label %for.body, !dbg !38
 
 for.body:                                         ; preds = %entry, %for.inc
   %i.09 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
   %k.09 = phi i32 [ 0, %entry ], [ %k.1, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.09, !dbg !40
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !40
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.09, !dbg !40
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !40
   %tobool = icmp eq i32 %0, 0, !dbg !40
   br i1 %tobool, label %for.inc, label %if.then, !dbg !40
 
 if.then:                                          ; preds = %for.body
   %call = tail call i32 (...) @foo(), !dbg !41
-  %.pre = load i32, i32* %arrayidx, align 4
+  %.pre = load i32, ptr %arrayidx, align 4
   br label %for.inc, !dbg !42
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
index d589dd5..2c187ca 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
@@ -23,7 +23,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind uwtable
-define void @do_not_interleave(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !4 {
+define void @do_not_interleave(ptr noalias nocapture readonly %in, ptr noalias nocapture %out, i32 %size) #0 !dbg !4 {
 entry:
   %cmp.4 = icmp eq i32 %size, 0, !dbg !10
   br i1 %cmp.4, label %for.end, label %for.body.preheader, !dbg !11
@@ -33,13 +33,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !12
-  %0 = bitcast float** %arrayidx to i32**, !dbg !12
-  %1 = load i32*, i32** %0, align 8, !dbg !12
-  %2 = load i32, i32* %1, align 4, !dbg !13
-  %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !14
-  %3 = bitcast float* %arrayidx2 to i32*, !dbg !15
-  store i32 %2, i32* %3, align 4, !dbg !15
+  %arrayidx = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv, !dbg !12
+  %0 = load ptr, ptr %arrayidx, align 8, !dbg !12
+  %1 = load i32, ptr %0, align 4, !dbg !13
+  %arrayidx2 = getelementptr inbounds float, ptr %out, i64 %indvars.iv, !dbg !14
+  store i32 %1, ptr %arrayidx2, align 4, !dbg !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !11
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !11
   %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !11
@@ -53,20 +51,18 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 ; Function Attrs: nounwind uwtable
-define void @interleave_not_profitable(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !6 {
+define void @interleave_not_profitable(ptr noalias nocapture readonly %in, ptr noalias nocapture %out, i32 %size) #0 !dbg !6 {
 entry:
   %cmp.4 = icmp eq i32 %size, 0, !dbg !20
   br i1 %cmp.4, label %for.end, label %for.body, !dbg !21
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !22
-  %0 = bitcast float** %arrayidx to i32**, !dbg !22
-  %1 = load i32*, i32** %0, align 8, !dbg !22
-  %2 = load i32, i32* %1, align 4, !dbg !23
-  %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !24
-  %3 = bitcast float* %arrayidx2 to i32*, !dbg !25
-  store i32 %2, i32* %3, align 4, !dbg !25
+  %arrayidx = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv, !dbg !22
+  %0 = load ptr, ptr %arrayidx, align 8, !dbg !22
+  %1 = load i32, ptr %0, align 4, !dbg !23
+  %arrayidx2 = getelementptr inbounds float, ptr %out, i64 %indvars.iv, !dbg !24
+  store i32 %1, ptr %arrayidx2, align 4, !dbg !25
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
   %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !21
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index c0fb467..63ed666 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -17,17 +17,17 @@ entry:
   %diff = alloca i32, align 4
   %cb = alloca [16 x i8], align 16
   %cc = alloca [16 x i8], align 16
-  store i32 0, i32* %diff, align 4, !dbg !10, !tbaa !11
+  store i32 0, ptr %diff, align 4, !dbg !10, !tbaa !11
   br label %for.body, !dbg !15
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ], !dbg !19
-  %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !19, !tbaa !21
+  %arrayidx = getelementptr inbounds [16 x i8], ptr %cb, i64 0, i64 %indvars.iv, !dbg !19
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !19, !tbaa !21
   %conv = sext i8 %0 to i32, !dbg !19
-  %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !19, !tbaa !21
+  %arrayidx2 = getelementptr inbounds [16 x i8], ptr %cc, i64 0, i64 %indvars.iv, !dbg !19
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !19, !tbaa !21
   %conv3 = sext i8 %1 to i32, !dbg !19
   %sub = sub i32 %conv, %conv3, !dbg !19
   %add = add nsw i32 %sub, %add8, !dbg !19
@@ -36,12 +36,12 @@ for.body:                                         ; preds = %for.body, %entry
   br i1 %exitcond, label %for.end, label %for.body, !dbg !15
 
 for.end:                                          ; preds = %for.body
-  store i32 %add, i32* %diff, align 4, !dbg !19, !tbaa !11
-  call void @ibar(i32* %diff) #2, !dbg !22
+  store i32 %add, ptr %diff, align 4, !dbg !19, !tbaa !11
+  call void @ibar(ptr %diff) #2, !dbg !22
   ret i32 0, !dbg !23
 }
 
-declare void @ibar(i32*) #1
+declare void @ibar(ptr) #1
 
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
index d811c0f..10ebf91 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
 
-define i32 @accum(i32* nocapture readonly %x, i32 %N) #0 {
+define i32 @accum(ptr nocapture readonly %x, i32 %N) #0 {
 entry:
 ; CHECK-LABEL: @accum
 ; CHECK-NOT: x i32>
@@ -16,8 +16,8 @@ for.inc.preheader:
 for.inc:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.inc.preheader ]
   %sum.02 = phi i32 [ %add, %for.inc ], [ 0, %for.inc.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %sum.02
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index fcabe10..6ed4cda 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -29,7 +29,7 @@ target triple = "i386-unknown-linux-gnu"
 ;   }
 ; }
 
-define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
@@ -45,8 +45,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -55,8 +55,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -65,8 +65,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -75,8 +75,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -85,8 +85,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -95,8 +95,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -105,8 +105,8 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -115,15 +115,14 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -142,14 +141,12 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP5]], i32 1, <8 x i1> [[TMP0]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP4]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016
@@ -160,10 +157,10 @@ define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* no
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       if.then:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_09]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[IX_09]]
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP7]], i8* [[ARRAYIDX3]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[IX_09]]
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP7]], ptr [[ARRAYIDX3]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       for.inc:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INC]] = add nuw nsw i32 [[IX_09]], 1
@@ -183,10 +180,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.09, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.09
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -205,7 +202,7 @@ for.end:
 ; be vectorized with masked wide-loads with the mask properly shuffled and
 ; And-ed with the gaps mask.
 ;
-define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
+define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
@@ -221,8 +218,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -231,8 +228,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -241,8 +238,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -251,8 +248,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -261,8 +258,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -271,8 +268,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -281,8 +278,8 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -291,15 +288,14 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -318,15 +314,13 @@ define dso_local void @masked_strided1_optsize(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP5]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -345,10 +339,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.09, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.09
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -387,7 +381,7 @@ for.end:
 ;   }
 ; }
 ;
-define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
+define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided1_optsize_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -413,8 +407,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -423,8 +417,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -433,8 +427,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -443,8 +437,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -453,8 +447,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -463,8 +457,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -473,8 +467,8 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -483,15 +477,14 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -519,16 +512,14 @@ define dso_local void @masked_strided1_optsize_unknown_tc(i8* noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[TMP6]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[TMP6]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP7]], i32 1, <8 x i1> [[TMP4]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -551,10 +542,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.010, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.010
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -594,7 +585,7 @@ for.end:
 ;   }
 ; }
 ;
-define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
+define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided3_optsize_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -620,8 +611,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -630,8 +621,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -640,8 +631,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -650,8 +641,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -660,8 +651,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -670,8 +661,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -680,8 +671,8 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -690,15 +681,14 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP52]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP51]], <8 x i8>* [[TMP53]], i32 1, <8 x i1> [[TMP3]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -726,16 +716,14 @@ define dso_local void @masked_strided3_optsize_unknown_tc(i8* noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul i32 [[INDEX]], 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <24 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <24 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = and <24 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0v24i8(<24 x i8>* [[TMP5]], i32 1, <24 x i1> [[TMP6]], <24 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <24 x i8> @llvm.masked.load.v24i8.p0(ptr [[TMP3]], i32 1, <24 x i1> [[TMP6]], <24 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP8]], i32 1, <8 x i1> [[TMP4]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP7]], i32 1, <8 x i1> [[TMP4]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -758,10 +746,10 @@ for.body:
 
 if.then:
   %mul = mul nsw i32 %ix.010, 3
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.010
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.010
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -797,7 +785,7 @@ for.end:
 ;   }
 ; }
 ;
-define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
+define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -806,29 +794,29 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP0]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <8 x i32> [[TMP0]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP7]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP7]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP0]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP0]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP0]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP13]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP13]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP0]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP2]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = load i8, i8* [[TMP4]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP6]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = load i8, i8* [[TMP8]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = load i8, i8* [[TMP10]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, i8* [[TMP12]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP14]], align 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP2]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = load i8, ptr [[TMP8]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = load i8, ptr [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, ptr [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP14]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = insertelement <8 x i8> poison, i8 [[TMP17]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP18]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = insertelement <8 x i8> [[TMP26]], i8 [[TMP19]], i64 2
@@ -837,9 +825,8 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP29]], i8 [[TMP22]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP23]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP24]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = bitcast i8* [[TMP33]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[TMP32]], <8 x i8>* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[TMP32]], ptr [[TMP33]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -853,13 +840,11 @@ define dso_local void @unconditional_strided1_optsize(i8* noalias nocapture read
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP0]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <16 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP2]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP0]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP1]], i32 1, <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP4]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -872,10 +857,10 @@ entry:
 for.body:
   %ix.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %mul = shl nuw nsw i32 %ix.06, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.06
-  store i8 %0, i8* %arrayidx1, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %q, i32 %ix.06
+  store i8 %0, ptr %arrayidx1, align 1
   %inc = add nuw nsw i32 %ix.06, 1
   %exitcond = icmp eq i32 %inc, 1024
   br i1 %exitcond, label %for.end, label %for.body
@@ -905,7 +890,7 @@ for.end:
 ;         q[ix] = t;
 ;   }
 ;
-define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
+define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @unconditional_strided1_optsize_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -926,8 +911,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -936,8 +921,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -946,8 +931,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -956,8 +941,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -966,8 +951,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -976,8 +961,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -986,8 +971,8 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -996,15 +981,14 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = bitcast i8* [[TMP50]] to <8 x i8>*
-; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[TMP49]], <8 x i8>* [[TMP51]], i32 1, <8 x i1> [[TMP0]])
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -1030,15 +1014,13 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(i8* noalias noc
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[TMP4]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <8 x i8>*
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8>* [[TMP6]], i32 1, <8 x i1> [[TMP0]])
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP5]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -1055,10 +1037,10 @@ for.body.preheader:
 for.body:
   %ix.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %mul = shl nuw nsw i32 %ix.07, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx1 = getelementptr inbounds i8, i8* %q, i32 %ix.07
-  store i8 %0, i8* %arrayidx1, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %q, i32 %ix.07
+  store i8 %0, ptr %arrayidx1, align 1
   %inc = add nuw nsw i32 %ix.07, 1
   %exitcond = icmp eq i32 %inc, %n
   br i1 %exitcond, label %for.end.loopexit, label %for.body
@@ -1095,7 +1077,7 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
+define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
@@ -1111,8 +1093,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -1121,8 +1103,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -1131,8 +1113,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -1141,8 +1123,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -1151,8 +1133,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -1161,8 +1143,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -1171,8 +1153,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -1181,8 +1163,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -1192,8 +1174,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
@@ -1202,8 +1184,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue18:
@@ -1212,8 +1194,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue20:
@@ -1222,8 +1204,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue22:
@@ -1232,8 +1214,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue24:
@@ -1242,8 +1224,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue26:
@@ -1252,8 +1234,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue28:
@@ -1262,8 +1244,8 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue30:
@@ -1273,72 +1255,72 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP101]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], i8* [[TMP102]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP105]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], i8* [[TMP106]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue32:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP109]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], i8* [[TMP110]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP113]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], i8* [[TMP114]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP117]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], i8* [[TMP118]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP121]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], i8* [[TMP122]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP125]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], i8* [[TMP126]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP129]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], i8* [[TMP130]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
@@ -1346,72 +1328,72 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP138]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], i8* [[TMP139]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP142]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], i8* [[TMP143]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP146]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], i8* [[TMP147]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP150]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], i8* [[TMP151]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP154]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], i8* [[TMP155]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP158]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], i8* [[TMP159]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP162]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], i8* [[TMP163]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -1432,20 +1414,18 @@ define dso_local void @masked_strided2(i8* noalias nocapture readonly %p, i8* no
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC1]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = sub <8 x i8> zeroinitializer, [[TMP5]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[TMP7]], i32 [[TMP4]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 [[TMP4]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -1464,18 +1444,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.024, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx4, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx6, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx11, align 1
+  %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx11, align 1
   br label %for.inc
 
 for.inc:
@@ -1504,16 +1484,16 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
+define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr  {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_reverse(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
-; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 2048
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* [[Q]], i32 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt i8* [[TMP0]], [[SCEVGEP]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, i8* [[Q]], i32 2049
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[Q]], i32 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt i8* [[TMP2]], [[SCEVGEP2]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 2048
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[Q]], i32 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt ptr [[TMP0]], [[SCEVGEP]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Q]], i32 2049
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[Q]], i32 3
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt ptr [[TMP2]], [[SCEVGEP2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
 ; DISABLED_MASKED_STRIDED:       vector.ph:
@@ -1529,8 +1509,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP8]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP8]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = insertelement <8 x i8> poison, i8 [[TMP10]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -1539,8 +1519,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP13]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if6:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP14]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, i8* [[TMP15]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP14]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, ptr [[TMP15]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = insertelement <8 x i8> [[TMP12]], i8 [[TMP16]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue7:
@@ -1549,8 +1529,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if8:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP20]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, i8* [[TMP21]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP20]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, ptr [[TMP21]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP22]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue9:
@@ -1559,8 +1539,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP25]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if10:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP26]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, i8* [[TMP27]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP26]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, ptr [[TMP27]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = insertelement <8 x i8> [[TMP24]], i8 [[TMP28]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue11:
@@ -1569,8 +1549,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP31]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if12:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP32]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, i8* [[TMP33]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP32]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP34]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue13:
@@ -1579,8 +1559,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP37]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP38]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, i8* [[TMP39]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP38]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, ptr [[TMP39]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = insertelement <8 x i8> [[TMP36]], i8 [[TMP40]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue15:
@@ -1589,8 +1569,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP43]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if16:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP44]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, i8* [[TMP45]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP44]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[TMP45]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = insertelement <8 x i8> [[TMP42]], i8 [[TMP46]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue17:
@@ -1599,8 +1579,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP49]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if18:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP50]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, i8* [[TMP51]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP50]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, ptr [[TMP51]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = insertelement <8 x i8> [[TMP48]], i8 [[TMP52]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue19:
@@ -1610,8 +1590,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP56]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if20:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP57]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, i8* [[TMP58]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP57]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = insertelement <8 x i8> poison, i8 [[TMP59]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue21:
@@ -1620,8 +1600,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP62]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if22:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP63]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, i8* [[TMP64]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP63]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = insertelement <8 x i8> [[TMP61]], i8 [[TMP65]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue23:
@@ -1630,8 +1610,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP68]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if24:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP69]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, i8* [[TMP70]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP69]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, ptr [[TMP70]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = insertelement <8 x i8> [[TMP67]], i8 [[TMP71]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue25:
@@ -1640,8 +1620,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP74]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if26:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP75]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, i8* [[TMP76]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP75]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, ptr [[TMP76]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = insertelement <8 x i8> [[TMP73]], i8 [[TMP77]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue27:
@@ -1650,8 +1630,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP80]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if28:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP81]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, i8* [[TMP82]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP81]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = insertelement <8 x i8> [[TMP79]], i8 [[TMP83]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue29:
@@ -1660,8 +1640,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP86]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if30:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP87]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, i8* [[TMP88]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP87]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = insertelement <8 x i8> [[TMP85]], i8 [[TMP89]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue31:
@@ -1670,8 +1650,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if32:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP93]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, i8* [[TMP94]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP93]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, ptr [[TMP94]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = insertelement <8 x i8> [[TMP91]], i8 [[TMP95]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue33:
@@ -1680,8 +1660,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP98]], label [[PRED_LOAD_IF34:%.*]], label [[PRED_LOAD_CONTINUE35:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP99]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, i8* [[TMP100]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP99]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, ptr [[TMP100]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = insertelement <8 x i8> [[TMP97]], i8 [[TMP101]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE35]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue35:
@@ -1691,72 +1671,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP105]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP106]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i8> [[TMP104]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], i8* [[TMP107]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], ptr [[TMP107]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP109]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP110]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i8> [[TMP104]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], i8* [[TMP111]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], ptr [[TMP111]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE37]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP113]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP114]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i8> [[TMP104]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], i8* [[TMP115]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], ptr [[TMP115]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE39]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP117]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP118]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i8> [[TMP104]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], i8* [[TMP119]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], ptr [[TMP119]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE41]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP121]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP122]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i8> [[TMP104]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], i8* [[TMP123]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], ptr [[TMP123]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE43]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP125]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP126]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i8> [[TMP104]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], i8* [[TMP127]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], ptr [[TMP127]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE45]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP129]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP130]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = extractelement <8 x i8> [[TMP104]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], i8* [[TMP131]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], ptr [[TMP131]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE47]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP104]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE49]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = sub <8 x i8> zeroinitializer, [[TMP104]]
@@ -1764,72 +1744,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP138]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP139]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i8> [[TMP137]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], i8* [[TMP140]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], ptr [[TMP140]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE51]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP142]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP143]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i8> [[TMP137]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], i8* [[TMP144]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], ptr [[TMP144]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE53]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP146]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP147]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i8> [[TMP137]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], i8* [[TMP148]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], ptr [[TMP148]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE55]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP150]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP151]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i8> [[TMP137]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], i8* [[TMP152]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], ptr [[TMP152]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE57]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP154]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP155]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i8> [[TMP137]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], i8* [[TMP156]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], ptr [[TMP156]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE59]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP158]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP159]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i8> [[TMP137]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], i8* [[TMP160]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], ptr [[TMP160]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE61]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue61:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP162]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if62:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP163]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = extractelement <8 x i8> [[TMP137]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], i8* [[TMP164]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], ptr [[TMP164]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE63]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue63:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP166]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if64:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP167:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP167]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP167]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP169:%.*]] = extractelement <8 x i8> [[TMP137]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], i8* [[TMP168]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], ptr [[TMP168]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE65]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue65:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -1842,17 +1822,17 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; DISABLED_MASKED_STRIDED:       if.then:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[ADD:%.*]] = or i32 [[MUL]], 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[ADD]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP173:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP171]], i8 [[TMP172]])
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[MUL]]
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], i8* [[ARRAYIDX6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]]
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], ptr [[ARRAYIDX6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[SUB:%.*]] = sub i8 0, [[TMP173]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[ADD]]
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], i8* [[ARRAYIDX11]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]]
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
 ; DISABLED_MASKED_STRIDED:       for.inc:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INC]] = add nsw i32 [[IX_024]], -1
@@ -1864,12 +1844,12 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-LABEL: @masked_strided2_reverse(
 ; ENABLED_MASKED_STRIDED-NEXT:  entry:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[CONV:%.*]] = zext i8 [[GUARD:%.*]] to i32
-; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 2048
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* [[Q]], i32 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt i8* [[TMP0]], [[SCEVGEP]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, i8* [[Q]], i32 2049
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[Q]], i32 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt i8* [[TMP2]], [[SCEVGEP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 2048
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[Q]], i32 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt ptr [[TMP0]], [[SCEVGEP]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Q]], i32 2049
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[Q]], i32 3
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = icmp ugt ptr [[TMP2]], [[SCEVGEP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.ph:
@@ -1885,8 +1865,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP8]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, i8* [[TMP9]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP8]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = insertelement <8 x i8> poison, i8 [[TMP10]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue:
@@ -1895,8 +1875,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP13]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if6:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP14]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, i8* [[TMP15]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP14]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = load i8, ptr [[TMP15]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = insertelement <8 x i8> [[TMP12]], i8 [[TMP16]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE7]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue7:
@@ -1905,8 +1885,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if8:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP20]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, i8* [[TMP21]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP20]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = load i8, ptr [[TMP21]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP22]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE9]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue9:
@@ -1915,8 +1895,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP25]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if10:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP26]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, i8* [[TMP27]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP26]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = load i8, ptr [[TMP27]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = insertelement <8 x i8> [[TMP24]], i8 [[TMP28]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE11]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue11:
@@ -1925,8 +1905,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP31]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if12:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP32]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, i8* [[TMP33]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP32]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP33]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = insertelement <8 x i8> [[TMP30]], i8 [[TMP34]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE13]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue13:
@@ -1935,8 +1915,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP37]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if14:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP38]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, i8* [[TMP39]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP38]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = load i8, ptr [[TMP39]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = insertelement <8 x i8> [[TMP36]], i8 [[TMP40]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE15]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue15:
@@ -1945,8 +1925,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP43]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if16:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP44]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, i8* [[TMP45]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP44]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = load i8, ptr [[TMP45]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = insertelement <8 x i8> [[TMP42]], i8 [[TMP46]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE17]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue17:
@@ -1955,8 +1935,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP49]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if18:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP50]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, i8* [[TMP51]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP50]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = load i8, ptr [[TMP51]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = insertelement <8 x i8> [[TMP48]], i8 [[TMP52]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE19]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue19:
@@ -1966,8 +1946,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP56]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if20:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP57]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, i8* [[TMP58]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP57]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = load i8, ptr [[TMP58]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = insertelement <8 x i8> poison, i8 [[TMP59]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE21]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue21:
@@ -1976,8 +1956,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP62]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if22:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP63]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, i8* [[TMP64]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP63]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = insertelement <8 x i8> [[TMP61]], i8 [[TMP65]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE23]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue23:
@@ -1986,8 +1966,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP68]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if24:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP69]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, i8* [[TMP70]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP69]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = load i8, ptr [[TMP70]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = insertelement <8 x i8> [[TMP67]], i8 [[TMP71]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE25]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue25:
@@ -1996,8 +1976,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP74]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if26:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP75]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, i8* [[TMP76]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP75]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = load i8, ptr [[TMP76]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = insertelement <8 x i8> [[TMP73]], i8 [[TMP77]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE27]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue27:
@@ -2006,8 +1986,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP80]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if28:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP81]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, i8* [[TMP82]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP81]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = insertelement <8 x i8> [[TMP79]], i8 [[TMP83]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE29]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue29:
@@ -2016,8 +1996,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP86]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if30:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP87]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, i8* [[TMP88]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP87]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = load i8, ptr [[TMP88]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = insertelement <8 x i8> [[TMP85]], i8 [[TMP89]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE31]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue31:
@@ -2026,8 +2006,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP92]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if32:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP93]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, i8* [[TMP94]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP93]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = load i8, ptr [[TMP94]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = insertelement <8 x i8> [[TMP91]], i8 [[TMP95]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE33]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue33:
@@ -2036,8 +2016,8 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP98]], label [[PRED_LOAD_IF34:%.*]], label [[PRED_LOAD_CONTINUE35:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if34:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP99]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, i8* [[TMP100]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP100:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP99]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = load i8, ptr [[TMP100]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = insertelement <8 x i8> [[TMP97]], i8 [[TMP101]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE35]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue35:
@@ -2047,72 +2027,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP105]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP106]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP106]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i8> [[TMP104]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], i8* [[TMP107]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP108]], ptr [[TMP107]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP109]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if36:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP110]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP110]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i8> [[TMP104]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], i8* [[TMP111]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP112]], ptr [[TMP111]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE37]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue37:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP113]], label [[PRED_STORE_IF38:%.*]], label [[PRED_STORE_CONTINUE39:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if38:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP114]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP114]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i8> [[TMP104]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], i8* [[TMP115]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP116]], ptr [[TMP115]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE39]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue39:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP117]], label [[PRED_STORE_IF40:%.*]], label [[PRED_STORE_CONTINUE41:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if40:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP118]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP118]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i8> [[TMP104]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], i8* [[TMP119]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP120]], ptr [[TMP119]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE41]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue41:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP121]], label [[PRED_STORE_IF42:%.*]], label [[PRED_STORE_CONTINUE43:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if42:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP122]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP122]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i8> [[TMP104]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], i8* [[TMP123]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP124]], ptr [[TMP123]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE43]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue43:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP125]], label [[PRED_STORE_IF44:%.*]], label [[PRED_STORE_CONTINUE45:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if44:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP126]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP126]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i8> [[TMP104]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], i8* [[TMP127]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP128]], ptr [[TMP127]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE45]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue45:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP129]], label [[PRED_STORE_IF46:%.*]], label [[PRED_STORE_CONTINUE47:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if46:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP130]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP130]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = extractelement <8 x i8> [[TMP104]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], i8* [[TMP131]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP132]], ptr [[TMP131]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE47]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue47:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF48:%.*]], label [[PRED_STORE_CONTINUE49:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if48:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP104]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE49]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue49:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = sub <8 x i8> zeroinitializer, [[TMP104]]
@@ -2120,72 +2100,72 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP138]], label [[PRED_STORE_IF50:%.*]], label [[PRED_STORE_CONTINUE51:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if50:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i32> [[TMP55]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP139]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP139]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i8> [[TMP137]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], i8* [[TMP140]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP141]], ptr [[TMP140]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE51]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue51:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i1> [[TMP5]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP142]], label [[PRED_STORE_IF52:%.*]], label [[PRED_STORE_CONTINUE53:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if52:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i32> [[TMP55]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP143]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP143]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i8> [[TMP137]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], i8* [[TMP144]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP145]], ptr [[TMP144]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE53]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue53:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i1> [[TMP5]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP146]], label [[PRED_STORE_IF54:%.*]], label [[PRED_STORE_CONTINUE55:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if54:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i32> [[TMP55]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP147]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP147]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i8> [[TMP137]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], i8* [[TMP148]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP149]], ptr [[TMP148]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE55]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue55:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i1> [[TMP5]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP150]], label [[PRED_STORE_IF56:%.*]], label [[PRED_STORE_CONTINUE57:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if56:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i32> [[TMP55]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP151]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP151]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i8> [[TMP137]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], i8* [[TMP152]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP153]], ptr [[TMP152]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE57]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue57:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i1> [[TMP5]], i64 4
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP154]], label [[PRED_STORE_IF58:%.*]], label [[PRED_STORE_CONTINUE59:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if58:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i32> [[TMP55]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP155]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP155]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i8> [[TMP137]], i64 4
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], i8* [[TMP156]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP157]], ptr [[TMP156]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE59]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue59:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i1> [[TMP5]], i64 5
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP158]], label [[PRED_STORE_IF60:%.*]], label [[PRED_STORE_CONTINUE61:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if60:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i32> [[TMP55]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP159]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP159]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i8> [[TMP137]], i64 5
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], i8* [[TMP160]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP161]], ptr [[TMP160]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE61]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue61:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i1> [[TMP5]], i64 6
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP162]], label [[PRED_STORE_IF62:%.*]], label [[PRED_STORE_CONTINUE63:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if62:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i32> [[TMP55]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP163]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP163]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = extractelement <8 x i8> [[TMP137]], i64 6
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], i8* [[TMP164]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP165]], ptr [[TMP164]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE63]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue63:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = extractelement <8 x i1> [[TMP5]], i64 7
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP166]], label [[PRED_STORE_IF64:%.*]], label [[PRED_STORE_CONTINUE65]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if64:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP167:%.*]] = extractelement <8 x i32> [[TMP55]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP167]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP168:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP167]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP169:%.*]] = extractelement <8 x i8> [[TMP137]], i64 7
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], i8* [[TMP168]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP169]], ptr [[TMP168]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE65]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue65:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -2198,17 +2178,17 @@ define dso_local void @masked_strided2_reverse(i8* noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       if.then:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[MUL]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[MUL]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP171:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[ADD:%.*]] = or i32 [[MUL]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[ADD]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, i8* [[ARRAYIDX4]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[ADD]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP172:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP173:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP171]], i8 [[TMP172]])
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[MUL]]
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], i8* [[ARRAYIDX6]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[MUL]]
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP173]], ptr [[ARRAYIDX6]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[SUB:%.*]] = sub i8 0, [[TMP173]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[ADD]]
-; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], i8* [[ARRAYIDX11]], align 1
+; ENABLED_MASKED_STRIDED-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[ADD]]
+; ENABLED_MASKED_STRIDED-NEXT:    store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[FOR_INC]]
 ; ENABLED_MASKED_STRIDED:       for.inc:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INC]] = add nsw i32 [[IX_024]], -1
@@ -2228,18 +2208,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.024, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx4, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx6, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx11, align 1
+  %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx11, align 1
   br label %for.inc
 
 for.inc:
@@ -2277,7 +2257,7 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %guard, i32 %n) local_unnamed_addr optsize {
+define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %guard, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @masked_strided2_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP22:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -2302,8 +2282,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP5]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -2312,8 +2292,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP11]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP11]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = insertelement <8 x i8> [[TMP9]], i8 [[TMP13]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -2322,8 +2302,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP17]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = insertelement <8 x i8> [[TMP15]], i8 [[TMP19]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -2332,8 +2312,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP23]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP23]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = insertelement <8 x i8> [[TMP21]], i8 [[TMP25]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -2342,8 +2322,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP29]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, i8* [[TMP30]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP29]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = insertelement <8 x i8> [[TMP27]], i8 [[TMP31]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -2352,8 +2332,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP35]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, i8* [[TMP36]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP35]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <8 x i8> [[TMP33]], i8 [[TMP37]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -2362,8 +2342,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP40]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP41]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, i8* [[TMP42]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP41]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP44:%.*]] = insertelement <8 x i8> [[TMP39]], i8 [[TMP43]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -2372,8 +2352,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP46]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP47]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, i8* [[TMP48]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP47]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = insertelement <8 x i8> [[TMP45]], i8 [[TMP49]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
@@ -2383,8 +2363,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP54]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = load i8, i8* [[TMP55]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP54]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP57:%.*]] = insertelement <8 x i8> poison, i8 [[TMP56]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue18:
@@ -2393,8 +2373,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP59]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP60]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = load i8, i8* [[TMP61]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP60]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP62:%.*]] = load i8, ptr [[TMP61]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP63:%.*]] = insertelement <8 x i8> [[TMP58]], i8 [[TMP62]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue20:
@@ -2403,8 +2383,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP65]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP66]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = load i8, i8* [[TMP67]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP66]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP69:%.*]] = insertelement <8 x i8> [[TMP64]], i8 [[TMP68]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue22:
@@ -2413,8 +2393,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP71]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP72]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = load i8, i8* [[TMP73]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP72]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP74:%.*]] = load i8, ptr [[TMP73]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP75:%.*]] = insertelement <8 x i8> [[TMP70]], i8 [[TMP74]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue24:
@@ -2423,8 +2403,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP77]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP78]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = load i8, i8* [[TMP79]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP78]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP80:%.*]] = load i8, ptr [[TMP79]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP81:%.*]] = insertelement <8 x i8> [[TMP76]], i8 [[TMP80]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue26:
@@ -2433,8 +2413,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP83]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP84]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = load i8, i8* [[TMP85]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP84]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP86:%.*]] = load i8, ptr [[TMP85]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP87:%.*]] = insertelement <8 x i8> [[TMP82]], i8 [[TMP86]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue28:
@@ -2443,8 +2423,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP89]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP90]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = load i8, i8* [[TMP91]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP90]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP92:%.*]] = load i8, ptr [[TMP91]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP93:%.*]] = insertelement <8 x i8> [[TMP88]], i8 [[TMP92]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue30:
@@ -2453,8 +2433,8 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP95]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP96]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = load i8, i8* [[TMP97]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP96]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP98:%.*]] = load i8, ptr [[TMP97]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP99:%.*]] = insertelement <8 x i8> [[TMP94]], i8 [[TMP98]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue32:
@@ -2464,72 +2444,72 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP102]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP103]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP103]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i8> [[TMP101]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP105]], i8* [[TMP104]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP105]], ptr [[TMP104]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP106]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP107]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP107]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i8> [[TMP101]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP109]], i8* [[TMP108]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP109]], ptr [[TMP108]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP110]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP111]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP111]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i8> [[TMP101]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP113]], i8* [[TMP112]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP113]], ptr [[TMP112]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP114]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP115]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP115]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i8> [[TMP101]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP117]], i8* [[TMP116]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP117]], ptr [[TMP116]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP118]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP119]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP119]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i8> [[TMP101]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP121]], i8* [[TMP120]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP121]], ptr [[TMP120]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP122]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP123]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP123]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i8> [[TMP101]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP125]], i8* [[TMP124]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP125]], ptr [[TMP124]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP126]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP127]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP127]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i8> [[TMP101]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP129]], i8* [[TMP128]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP129]], ptr [[TMP128]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP130]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP131]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP131]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP133:%.*]] = extractelement <8 x i8> [[TMP101]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP133]], i8* [[TMP132]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP133]], ptr [[TMP132]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = sub <8 x i8> zeroinitializer, [[TMP101]]
@@ -2537,72 +2517,72 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP135]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i32> [[TMP52]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP136]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP136]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i8> [[TMP134]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP138]], i8* [[TMP137]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP138]], ptr [[TMP137]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = extractelement <8 x i1> [[TMP3]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP139]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i32> [[TMP52]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP140]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP140]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i8> [[TMP134]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP142]], i8* [[TMP141]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP142]], ptr [[TMP141]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = extractelement <8 x i1> [[TMP3]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP143]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i32> [[TMP52]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP144]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP144]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i8> [[TMP134]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP146]], i8* [[TMP145]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP146]], ptr [[TMP145]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = extractelement <8 x i1> [[TMP3]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP147]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i32> [[TMP52]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP148]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP148]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i8> [[TMP134]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP150]], i8* [[TMP149]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP150]], ptr [[TMP149]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = extractelement <8 x i1> [[TMP3]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP151]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i32> [[TMP52]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP152]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP152]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i8> [[TMP134]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP154]], i8* [[TMP153]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP154]], ptr [[TMP153]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = extractelement <8 x i1> [[TMP3]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP155]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i32> [[TMP52]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP156]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP156]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i8> [[TMP134]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP158]], i8* [[TMP157]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP158]], ptr [[TMP157]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = extractelement <8 x i1> [[TMP3]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP159]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i32> [[TMP52]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP160]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP160]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i8> [[TMP134]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP162]], i8* [[TMP161]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP162]], ptr [[TMP161]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = extractelement <8 x i1> [[TMP3]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP163]], label [[PRED_STORE_IF61:%.*]], label [[PRED_STORE_CONTINUE62]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if61:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i32> [[TMP52]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP164]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP164]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP166:%.*]] = extractelement <8 x i8> [[TMP134]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP166]], i8* [[TMP165]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP166]], ptr [[TMP165]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE62]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue62:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
@@ -2631,21 +2611,19 @@ define dso_local void @masked_strided2_unknown_tc(i8* noalias nocapture readonly
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, i8* [[P:%.*]], i32 [[TMP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[TMP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP3]] to <16 x i8>*
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP5]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = or i32 [[TMP2]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = sub <8 x i8> zeroinitializer, [[TMP7]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr i8, i8* [[TMP9]], i32 [[TMP6]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 [[TMP6]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP8]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP11]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP10]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -2667,18 +2645,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.023, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx3, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx5 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx5, align 1
+  %arrayidx5 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx5, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx9 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx9, align 1
+  %arrayidx9 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx9, align 1
   br label %for.inc
 
 for.inc:
@@ -2719,7 +2697,7 @@ for.end:
 ; }
 ;}
 ;
-define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
+define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %n) local_unnamed_addr optsize {
 ; DISABLED_MASKED_STRIDED-LABEL: @unconditional_masked_strided2_unknown_tc(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP20:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -2740,8 +2718,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP3]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = insertelement <8 x i8> poison, i8 [[TMP5]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue:
@@ -2750,8 +2728,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP9]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP11]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue2:
@@ -2760,8 +2738,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP15]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, i8* [[TMP16]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP15]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <8 x i8> [[TMP13]], i8 [[TMP17]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue4:
@@ -2770,8 +2748,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP21]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP21]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = insertelement <8 x i8> [[TMP19]], i8 [[TMP23]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue6:
@@ -2780,8 +2758,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if7:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP27]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, i8* [[TMP28]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP27]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = insertelement <8 x i8> [[TMP25]], i8 [[TMP29]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue8:
@@ -2790,8 +2768,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP33]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, i8* [[TMP34]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP33]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <8 x i8> [[TMP31]], i8 [[TMP35]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue10:
@@ -2800,8 +2778,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP39:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP39]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, i8* [[TMP40]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP39]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP41:%.*]] = load i8, ptr [[TMP40]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP42:%.*]] = insertelement <8 x i8> [[TMP37]], i8 [[TMP41]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue12:
@@ -2810,8 +2788,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP45]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, i8* [[TMP46]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP45]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP47:%.*]] = load i8, ptr [[TMP46]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP48:%.*]] = insertelement <8 x i8> [[TMP43]], i8 [[TMP47]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
@@ -2821,8 +2799,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP52]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, i8* [[TMP53]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP52]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP55:%.*]] = insertelement <8 x i8> poison, i8 [[TMP54]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
@@ -2831,8 +2809,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP58:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP58]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, i8* [[TMP59]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP58]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP60:%.*]] = load i8, ptr [[TMP59]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP61:%.*]] = insertelement <8 x i8> [[TMP56]], i8 [[TMP60]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue18:
@@ -2841,8 +2819,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if19:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP64:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP64]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, i8* [[TMP65]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP64]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP66:%.*]] = load i8, ptr [[TMP65]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP67:%.*]] = insertelement <8 x i8> [[TMP62]], i8 [[TMP66]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue20:
@@ -2851,8 +2829,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if21:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP70:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP70]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, i8* [[TMP71]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP70]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP73:%.*]] = insertelement <8 x i8> [[TMP68]], i8 [[TMP72]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue22:
@@ -2861,8 +2839,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if23:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP76:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP76]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, i8* [[TMP77]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP76]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP78:%.*]] = load i8, ptr [[TMP77]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP79:%.*]] = insertelement <8 x i8> [[TMP74]], i8 [[TMP78]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue24:
@@ -2871,8 +2849,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if25:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP82:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP82]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, i8* [[TMP83]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP82]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP84:%.*]] = load i8, ptr [[TMP83]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP85:%.*]] = insertelement <8 x i8> [[TMP80]], i8 [[TMP84]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue26:
@@ -2881,8 +2859,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if27:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP88:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP88]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, i8* [[TMP89]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP88]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP91:%.*]] = insertelement <8 x i8> [[TMP86]], i8 [[TMP90]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue28:
@@ -2891,8 +2869,8 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if29:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP94:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 [[TMP94]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, i8* [[TMP95]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i8, ptr [[P]], i32 [[TMP94]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP96:%.*]] = load i8, ptr [[TMP95]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP97:%.*]] = insertelement <8 x i8> [[TMP92]], i8 [[TMP96]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue30:
@@ -2902,72 +2880,72 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP100]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP101:%.*]] = extractelement <8 x i32> [[TMP1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, i8* [[Q:%.*]], i32 [[TMP101]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP102:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP101]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP103:%.*]] = extractelement <8 x i8> [[TMP99]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], i8* [[TMP102]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP103]], ptr [[TMP102]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP104:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP104]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if31:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP105:%.*]] = extractelement <8 x i32> [[TMP1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP105]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP106:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP105]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP107:%.*]] = extractelement <8 x i8> [[TMP99]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], i8* [[TMP106]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP107]], ptr [[TMP106]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE32]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue32:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP108:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP108]], label [[PRED_STORE_IF33:%.*]], label [[PRED_STORE_CONTINUE34:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if33:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP109:%.*]] = extractelement <8 x i32> [[TMP1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP109]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP109]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP111:%.*]] = extractelement <8 x i8> [[TMP99]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], i8* [[TMP110]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP111]], ptr [[TMP110]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE34]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue34:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP112:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP112]], label [[PRED_STORE_IF35:%.*]], label [[PRED_STORE_CONTINUE36:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if35:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP113:%.*]] = extractelement <8 x i32> [[TMP1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP113]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP114:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP113]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP115:%.*]] = extractelement <8 x i8> [[TMP99]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], i8* [[TMP114]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP115]], ptr [[TMP114]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE36]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue36:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP116:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP116]], label [[PRED_STORE_IF37:%.*]], label [[PRED_STORE_CONTINUE38:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if37:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP117:%.*]] = extractelement <8 x i32> [[TMP1]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP117]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP118:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP117]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP119:%.*]] = extractelement <8 x i8> [[TMP99]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], i8* [[TMP118]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP119]], ptr [[TMP118]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE38]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue38:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP120:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP120]], label [[PRED_STORE_IF39:%.*]], label [[PRED_STORE_CONTINUE40:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if39:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP121:%.*]] = extractelement <8 x i32> [[TMP1]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP121]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP122:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP121]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP123:%.*]] = extractelement <8 x i8> [[TMP99]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], i8* [[TMP122]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP123]], ptr [[TMP122]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE40]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue40:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP124:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP124]], label [[PRED_STORE_IF41:%.*]], label [[PRED_STORE_CONTINUE42:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if41:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP125:%.*]] = extractelement <8 x i32> [[TMP1]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP125]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP126:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP125]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP127:%.*]] = extractelement <8 x i8> [[TMP99]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], i8* [[TMP126]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP127]], ptr [[TMP126]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE42]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue42:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP128:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP128]], label [[PRED_STORE_IF43:%.*]], label [[PRED_STORE_CONTINUE44:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if43:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP129:%.*]] = extractelement <8 x i32> [[TMP1]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP129]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP130:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP129]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP131:%.*]] = extractelement <8 x i8> [[TMP99]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], i8* [[TMP130]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP131]], ptr [[TMP130]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE44]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue44:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP132:%.*]] = sub <8 x i8> zeroinitializer, [[TMP99]]
@@ -2975,72 +2953,72 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP133]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if45:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP134:%.*]] = extractelement <8 x i32> [[TMP50]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP134]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP135:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP134]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP136:%.*]] = extractelement <8 x i8> [[TMP132]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], i8* [[TMP135]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP136]], ptr [[TMP135]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE46]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue46:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP137:%.*]] = extractelement <8 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP137]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if47:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP138:%.*]] = extractelement <8 x i32> [[TMP50]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP138]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP139:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP138]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP140:%.*]] = extractelement <8 x i8> [[TMP132]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], i8* [[TMP139]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP140]], ptr [[TMP139]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE48]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue48:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP141:%.*]] = extractelement <8 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP141]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if49:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP142:%.*]] = extractelement <8 x i32> [[TMP50]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP142]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP143:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP142]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP144:%.*]] = extractelement <8 x i8> [[TMP132]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], i8* [[TMP143]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP144]], ptr [[TMP143]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE50]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue50:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP145:%.*]] = extractelement <8 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP145]], label [[PRED_STORE_IF51:%.*]], label [[PRED_STORE_CONTINUE52:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if51:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP146:%.*]] = extractelement <8 x i32> [[TMP50]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP146]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP147:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP146]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP148:%.*]] = extractelement <8 x i8> [[TMP132]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], i8* [[TMP147]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP148]], ptr [[TMP147]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE52]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue52:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP149:%.*]] = extractelement <8 x i1> [[TMP0]], i64 4
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP149]], label [[PRED_STORE_IF53:%.*]], label [[PRED_STORE_CONTINUE54:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if53:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP150:%.*]] = extractelement <8 x i32> [[TMP50]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP150]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP151:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP150]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP152:%.*]] = extractelement <8 x i8> [[TMP132]], i64 4
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], i8* [[TMP151]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP152]], ptr [[TMP151]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE54]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue54:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP153:%.*]] = extractelement <8 x i1> [[TMP0]], i64 5
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP153]], label [[PRED_STORE_IF55:%.*]], label [[PRED_STORE_CONTINUE56:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if55:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP154:%.*]] = extractelement <8 x i32> [[TMP50]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP154]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP155:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP154]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP156:%.*]] = extractelement <8 x i8> [[TMP132]], i64 5
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], i8* [[TMP155]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP156]], ptr [[TMP155]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE56]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue56:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP157:%.*]] = extractelement <8 x i1> [[TMP0]], i64 6
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP157]], label [[PRED_STORE_IF57:%.*]], label [[PRED_STORE_CONTINUE58:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if57:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP158:%.*]] = extractelement <8 x i32> [[TMP50]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP158]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP159:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP158]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP160:%.*]] = extractelement <8 x i8> [[TMP132]], i64 6
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], i8* [[TMP159]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP160]], ptr [[TMP159]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE58]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue58:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP161:%.*]] = extractelement <8 x i1> [[TMP0]], i64 7
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP161]], label [[PRED_STORE_IF59:%.*]], label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if59:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP162:%.*]] = extractelement <8 x i32> [[TMP50]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, i8* [[Q]], i32 [[TMP162]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP163:%.*]] = getelementptr inbounds i8, ptr [[Q]], i32 [[TMP162]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP164:%.*]] = extractelement <8 x i8> [[TMP132]], i64 7
-; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], i8* [[TMP163]], align 1
+; DISABLED_MASKED_STRIDED-NEXT:    store i8 [[TMP164]], ptr [[TMP163]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
@@ -3068,20 +3046,18 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(i8* noalias noca
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or <8 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[INDEX]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i32 [[TMP1]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], 1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = sub <8 x i8> zeroinitializer, [[TMP5]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, i8* [[Q:%.*]], i32 -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, i8* [[TMP7]], i32 [[TMP4]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 [[TMP4]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[TMP9]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -3098,18 +3074,18 @@ for.body.preheader:
 for.body:
   %ix.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %mul = shl nuw nsw i32 %ix.021, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx2 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx2, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx4, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx8, align 1
   %inc = add nuw nsw i32 %ix.021, 1
   %exitcond = icmp eq i32 %inc, %n
   br i1 %exitcond, label %for.end.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index 8525bf1..16f85df 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -18,53 +18,51 @@ target triple = "x86_64-unknown-linux-gnu"
 ; (relates to the testcase in PR50566)
 
 ; Function Attrs: nofree norecurse nosync nounwind uwtable
-define dso_local void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) local_unnamed_addr {
+define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @test1(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DISABLED_MASKED_STRIDED:       vector.body:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP3]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP3]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP7]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP7]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP4]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP4]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP12]], i16* [[TMP6]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP12]], ptr [[TMP6]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP13]], i16* [[TMP8]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP13]], ptr [[TMP8]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP14]], i16* [[TMP10]], align 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = bitcast i16* [[TMP15]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP16]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP14]], ptr [[TMP10]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP15]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = or <4 x i64> [[TMP2]], <i64 1, i64 1, i64 1, i64 1>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = extractelement <4 x i64> [[TMP17]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP18]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP18]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = extractelement <4 x i64> [[TMP17]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP20]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP20]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = extractelement <4 x i64> [[TMP17]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP22]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP22]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP24]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP24]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], i16* [[TMP19]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], ptr [[TMP19]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP27]], i16* [[TMP21]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP27]], ptr [[TMP21]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP28]], i16* [[TMP23]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP28]], ptr [[TMP23]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP29]], i16* [[TMP25]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP29]], ptr [[TMP25]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
@@ -74,22 +72,19 @@ define dso_local void @test1(i16* noalias nocapture %points, i16* noalias nocapt
 ;
 ; ENABLED_MASKED_STRIDED-LABEL: @test1(
 ; ENABLED_MASKED_STRIDED-NEXT:  entry:
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, i16* [[POINTS:%.*]], i64 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, ptr [[POINTS:%.*]], i64 -1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[INDEX]], 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i16, i16* [[TMP0]], i64 [[TMP6]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <16 x i16>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[TMP0]], i64 [[TMP6]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <16 x i32> <i32 0, i32 4, i32 undef, i32 undef, i32 1, i32 5, i32 undef, i32 undef, i32 2, i32 6, i32 undef, i32 undef, i32 3, i32 7, i32 undef, i32 undef>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> [[INTERLEAVED_VEC]], <16 x i16>* [[TMP8]], i32 2, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>)
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[TMP7]], i32 2, <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP9]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -101,16 +96,16 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %1 = shl nuw nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
-  store i16 %0, i16* %arrayidx2, align 2
-  %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
-  %2 = load i16, i16* %arrayidx4, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1
+  store i16 %0, ptr %arrayidx2, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+  %2 = load i16, ptr %arrayidx4, align 2
   %3 = or i64 %1, 1
-  %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
-  store i16 %2, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3
+  store i16 %2, ptr %arrayidx7, align 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond.not, label %for.end, label %for.body
@@ -129,7 +124,7 @@ for.end:
 ;     }
 
 ; Function Attrs: nofree norecurse nosync nounwind uwtable
-define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) local_unnamed_addr {
+define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @test2(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[CMP15:%.*]] = icmp sgt i32 [[NUMPOINTS:%.*]], 0
@@ -146,84 +141,82 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE15]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP2]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = shl nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], i16* [[TMP6]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], ptr [[TMP6]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP10]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP10]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue2:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP13]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], i16* [[TMP14]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], ptr [[TMP14]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue4:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP17]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], i16* [[TMP18]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], ptr [[TMP18]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue6:
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = bitcast i16* [[TMP20]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP21]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP20]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP22:%.*]] = or <4 x i64> [[TMP3]], <i64 1, i64 1, i64 1, i64 1>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP23]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if8:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP24]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP24]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], i16* [[TMP25]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP26]], ptr [[TMP25]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue9:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP27]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if10:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = extractelement <4 x i64> [[TMP22]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP28]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP28]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP30:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP30]], i16* [[TMP29]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP30]], ptr [[TMP29]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE11]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue11:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if12:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP22]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP32]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP32]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP34]], i16* [[TMP33]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP34]], ptr [[TMP33]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE13]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue13:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP35:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP35]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP36:%.*]] = extractelement <4 x i64> [[TMP22]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP36]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP36]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP38:%.*]] = extractelement <4 x i16> [[WIDE_MASKED_LOAD7]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP38]], i16* [[TMP37]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP38]], ptr [[TMP37]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
@@ -246,7 +239,7 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, i16* [[POINTS:%.*]], i64 -1
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr i16, ptr [[POINTS:%.*]], i64 -1
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -254,20 +247,17 @@ define dso_local void @test2(i16* noalias nocapture %points, i32 %numPoints, i16
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IV:%.*]] = or <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP3]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP2]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* [[TMP6]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP5]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = or i64 [[TMP4]], 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i16, i16* [[TMP0]], i64 [[TMP7]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <16 x i16>*
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i16, ptr [[TMP0]], i64 [[TMP7]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> <i32 0, i32 4, i32 undef, i32 undef, i32 1, i32 5, i32 undef, i32 undef, i32 2, i32 6, i32 undef, i32 undef, i32 3, i32 7, i32 undef, i32 undef>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], <i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false>
-; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> [[INTERLEAVED_VEC]], <16 x i16>* [[TMP9]], i32 2, <16 x i1> [[TMP10]])
+; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 2, <16 x i1> [[TMP10]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP11]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -286,16 +276,16 @@ for.body.preheader:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %1 = shl nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1
-  store i16 %0, i16* %arrayidx2, align 2
-  %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv
-  %2 = load i16, i16* %arrayidx4, align 2
+  %arrayidx2 = getelementptr inbounds i16, ptr %points, i64 %1
+  store i16 %0, ptr %arrayidx2, align 2
+  %arrayidx4 = getelementptr inbounds i16, ptr %y, i64 %indvars.iv
+  %2 = load i16, ptr %arrayidx4, align 2
   %3 = or i64 %1, 1
-  %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3
-  store i16 %2, i16* %arrayidx7, align 2
+  %arrayidx7 = getelementptr inbounds i16, ptr %points, i64 %3
+  store i16 %2, ptr %arrayidx7, align 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.end.loopexit, label %for.body
@@ -318,52 +308,51 @@ for.end:
 ;         points[i*3] = x[i];
 ;     }
 ; Function Attrs: nofree norecurse nosync nounwind uwtable
-define dso_local void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) local_unnamed_addr {
+define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) local_unnamed_addr {
 ; DISABLED_MASKED_STRIDED-LABEL: @test(
 ; DISABLED_MASKED_STRIDED-NEXT:  entry:
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; DISABLED_MASKED_STRIDED:       vector.body:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP5]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], i16* [[TMP6]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], ptr [[TMP6]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if1:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP10]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP10]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue2:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if3:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP13]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], i16* [[TMP14]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], ptr [[TMP14]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue4:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if5:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP17]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
-; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], i16* [[TMP18]], align 2
+; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], ptr [[TMP18]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue6:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -379,45 +368,44 @@ define dso_local void @test(i16* noalias nocapture %points, i16* noalias nocaptu
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[INDEX]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = bitcast i16* [[TMP0]] to <4 x i16>*
-; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[POINTS:%.*]], i64 [[TMP5]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP5]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 0
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], i16* [[TMP6]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP7]], ptr [[TMP6]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if1:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP9]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP9]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP11:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 1
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], i16* [[TMP10]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP11]], ptr [[TMP10]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue2:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if3:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP13]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP13]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 2
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], i16* [[TMP14]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP15]], ptr [[TMP14]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue4:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if5:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, i16* [[POINTS]], i64 [[TMP17]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP17]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
-; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], i16* [[TMP18]], align 2
+; ENABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP19]], ptr [[TMP18]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue6:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -432,15 +420,15 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %x, i64 %indvars.iv
+  %0 = load i16, ptr %arrayidx, align 2
   %cmp1 = icmp sgt i16 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
   %1 = mul nuw nsw i64 %indvars.iv, 3
-  %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1
-  store i16 %0, i16* %arrayidx6, align 2
+  %arrayidx6 = getelementptr inbounds i16, ptr %points, i64 %1
+  store i16 %0, ptr %arrayidx6, align 2
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
index 73879b5..6ed398b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-pr39099.ll
@@ -29,9 +29,9 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 ;CHECK-NEXT:  %{{.*}} = shl nuw nsw <8 x i32> %[[VECIND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ;CHECK-NEXT:  %[[M:.+]] = extractelement <8 x i1> %[[VMASK]], i32 0
 ;CHECK-NEXT:  br i1 %[[M]], label %pred.store.if, label %pred.store.continue
-;CHECK-NOT:   %{{.+}} = load <16 x i8>, <16 x i8>* %{{.*}}, align 1
+;CHECK-NOT:   %{{.+}} = load <16 x i8>, ptr %{{.*}}, align 1
 
-define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+define dso_local void @masked_strided(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -43,10 +43,10 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.09, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %ix.09
-  store i8 %0, i8* %arrayidx3, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %ix.09
+  store i8 %0, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll
index 266e4b4..368361f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-interleaved-access.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ; Verify that we do not get any loads of vectors with x86_fp80 elements.
 ;
 ; CHECK-NOT: load {{.*}} x x86_fp80
-define x86_fp80 @foo(x86_fp80* %a) {
+define x86_fp80 @foo(ptr %a) {
 entry:
   br label %for.body
 
@@ -16,11 +16,11 @@ for.cond.cleanup:
 for.body:
   %i.09 = phi i16 [ 0, %entry ], [ %add3, %for.body ]
   %res.08 = phi x86_fp80 [ undef, %entry ], [ %3, %for.body ]
-  %arrayidx = getelementptr inbounds x86_fp80, x86_fp80* %a, i16 %i.09
-  %0 = load x86_fp80, x86_fp80* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds x86_fp80, ptr %a, i16 %i.09
+  %0 = load x86_fp80, ptr %arrayidx, align 1
   %add = or i16 %i.09, 1
-  %arrayidx2 = getelementptr inbounds x86_fp80, x86_fp80* %a, i16 %add
-  %1 = load x86_fp80, x86_fp80* %arrayidx2, align 1
+  %arrayidx2 = getelementptr inbounds x86_fp80, ptr %a, i16 %add
+  %1 = load x86_fp80, ptr %arrayidx2, align 1
   %2 = fadd fast x86_fp80 %0, %res.08
   %3 = fadd fast x86_fp80 %2, %1
   %add3 = add nuw nsw i16 %i.09, 2
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index b040c92..c424fff 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -14,10 +14,10 @@ define void @example() nounwind ssp uwtable {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 [[TMP0]]
-; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, x86_fp80* [[TMP1]], align 16
-; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, x86_fp80* [[TMP2]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP0]]
+; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, ptr [[TMP1]], align 16
+; CHECK-NEXT:    store x86_fp80 0xK3FFF8000000000000000, ptr [[TMP2]], align 16
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -30,8 +30,8 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %conv = sitofp i32 1 to x86_fp80
-  %arrayidx = getelementptr inbounds [1024 x x86_fp80], [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
-  store x86_fp80 %conv, x86_fp80* %arrayidx, align 16
+  %arrayidx = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 %indvars.iv
+  store x86_fp80 %conv, ptr %arrayidx, align 16
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
index 855bba2..0063755 100644
--- a/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/alias-set-with-uncomputable-bounds.ll
@@ -5,7 +5,7 @@
 
 ; Alias set with uncomputable bounds contains a single load. We do not need
 ; runtime checks for that group and it should not block vectorization.
-define void @test1_uncomputable_bounds_single_load(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test1_uncomputable_bounds_single_load(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test1_uncomputable_bounds_single_load
 ; CHECK:       vector.body
 ; CHECK:         ret void
@@ -19,15 +19,15 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  %lv = load i32, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 %lv , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  %lv = load i32, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 %lv , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 %lv, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 %lv, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop
@@ -41,7 +41,7 @@ exit:
 
 ; Alias set with uncomputable bounds contains a single store. We do not need
 ; runtime checks for that group and it should not block vectorization.
-define void @test2_uncomputable_bounds_single_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test2_uncomputable_bounds_single_store(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test2_uncomputable_bounds_single_store
 ; CHECK:       vector.body
 ; CHECK:         ret void
@@ -55,15 +55,15 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  store i32 20, i32* %gep.2, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 0 , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  store i32 20, ptr %gep.2, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 0 , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 10, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 10, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop
@@ -77,7 +77,7 @@ exit:
 
 ; Alias set with uncomputable bounds contains a load and a store. This blocks
 ; vectorization, as we cannot generate runtime-checks for the set.
-define void @test3_uncomputable_bounds_load_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test3_uncomputable_bounds_load_store(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test3_uncomputable_bounds_load_store
 ; CHECK-NOT: vector.body
 
@@ -90,17 +90,17 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  store i32 20, i32* %gep.2, align 4
-  %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv
-  %lv = load i32, i32* %gep.22, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 %lv , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  store i32 20, ptr %gep.2, align 4
+  %gep.22 = getelementptr inbounds i32, ptr %ptr.2, i64 %iv
+  %lv = load i32, ptr %gep.22, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 %lv , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 %lv, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 %lv, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop
@@ -114,7 +114,7 @@ exit:
 
 ; Alias set with uncomputable bounds contains a load and a store. This blocks
 ; vectorization, as we cannot generate runtime-checks for the set.
-define void @test4_uncomputable_bounds_store_store(i32* noalias %ptr.1, i32* noalias %ptr.2, i32* noalias %ptr.3, i64 %N, i64 %X) {
+define void @test4_uncomputable_bounds_store_store(ptr noalias %ptr.1, ptr noalias %ptr.2, ptr noalias %ptr.3, i64 %N, i64 %X) {
 ; CHECK-LABEL: define void @test4_uncomputable_bounds_store_store
 ; CHECK-NOT: vector.body
 
@@ -127,17 +127,17 @@ ph:
 
 loop:
   %iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
-  %gep.1 = getelementptr inbounds i32, i32* %ptr.3, i64 %iv
-  %offset.1 = load i32, i32* %gep.1, align 4
-  %gep.2 = getelementptr inbounds i32, i32* %ptr.2, i32 %offset.1
-  store i32 20, i32* %gep.2, align 4
-  %gep.22 = getelementptr inbounds i32, i32* %ptr.2, i64 %iv
-  store i32 30, i32* %gep.22, align 4
-  %gep.3 = getelementptr inbounds i32, i32* %ptr.1, i64 %iv
-  store i32 0 , i32* %gep.3, align 4
+  %gep.1 = getelementptr inbounds i32, ptr %ptr.3, i64 %iv
+  %offset.1 = load i32, ptr %gep.1, align 4
+  %gep.2 = getelementptr inbounds i32, ptr %ptr.2, i32 %offset.1
+  store i32 20, ptr %gep.2, align 4
+  %gep.22 = getelementptr inbounds i32, ptr %ptr.2, i64 %iv
+  store i32 30, ptr %gep.22, align 4
+  %gep.3 = getelementptr inbounds i32, ptr %ptr.1, i64 %iv
+  store i32 0 , ptr %gep.3, align 4
   %offset.2 = add nsw i64 %iv, %X
-  %gep.4 = getelementptr inbounds i32, i32* %ptr.1, i64 %offset.2
-  store i32 10, i32* %gep.4, align 4
+  %gep.4 = getelementptr inbounds i32, ptr %ptr.1, i64 %offset.2
+  store i32 10, ptr %gep.4, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %N
   br i1 %exitcond, label %loop.exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/align.ll b/llvm/test/Transforms/LoopVectorize/align.ll
index e75a4dd..a55f377 100644
--- a/llvm/test/Transforms/LoopVectorize/align.ll
+++ b/llvm/test/Transforms/LoopVectorize/align.ll
@@ -5,22 +5,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Make sure we output the abi alignment if no alignment is specified.
 
 ;CHECK-LABEL: @align
-;CHECK: load <4 x i32>, <4 x i32>* {{.*}} align  4
-;CHECK: load <4 x i32>, <4 x i32>* {{.*}} align  4
+;CHECK: load <4 x i32>, ptr {{.*}} align  4
+;CHECK: load <4 x i32>, ptr {{.*}} align  4
 ;CHECK: store <4 x i32> {{.*}} align  4
 
-define void @align(i32* %a, i32* %b, i32* %c) nounwind uwtable ssp {
+define void @align(ptr %a, ptr %b, ptr %c) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %3 = load i32, i32* %2
-  %4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  %5 = load i32, i32* %4
+  %2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %3 = load i32, ptr %2
+  %4 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  %5 = load i32, ptr %4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %6, i32* %7
+  %7 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %6, ptr %7
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 128
diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll
index 6070fb1..0ca3e05 100644
--- a/llvm/test/Transforms/LoopVectorize/assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/assume.ll
@@ -1,10 +1,10 @@
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; CHECK-LABEL: @test1(
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* {{.*}}, align 4
-; CHECK:         [[WIDE_LOAD1:%.*]] = load <2 x float>, <2 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x float>, ptr {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], <float 1.000000e+02, float 1.000000e+02>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], <float 1.000000e+02, float 1.000000e+02>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
@@ -20,13 +20,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.assume(i1 %cmp1)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -39,9 +39,9 @@ declare void @llvm.assume(i1) #0
 
 attributes #0 = { nounwind willreturn }
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(%struct.data* nocapture readonly %d) {
+define void @test2(ptr nocapture readonly %d) {
 ; CHECK-LABEL: @test2(
 ; CHECK:       entry:
 ; CHECK:         [[MASKCOND:%.*]] = icmp eq i64 %maskedptr, 0
@@ -53,14 +53,13 @@ define void @test2(%struct.data* nocapture readonly %d) {
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND4]])
 ; CHECK:       for.body:
 entry:
-  %b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1
-  %0 = load float*, float** %b, align 8
-  %ptrint = ptrtoint float* %0 to i64
+  %b = getelementptr inbounds %struct.data, ptr %d, i64 0, i32 1
+  %0 = load ptr, ptr %b, align 8
+  %ptrint = ptrtoint ptr %0 to i64
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
-  %a = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 0
-  %1 = load float*, float** %a, align 8
-  %ptrint2 = ptrtoint float* %1 to i64
+  %1 = load ptr, ptr %d, align 8
+  %ptrint2 = ptrtoint ptr %1 to i64
   %maskedptr3 = and i64 %ptrint2, 31
   %maskcond4 = icmp eq i64 %maskedptr3, 0
   br label %for.body
@@ -69,12 +68,12 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.assume(i1 %maskcond)
-  %arrayidx = getelementptr inbounds float, float* %0, i64 %indvars.iv
-  %2 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %0, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
   %add = fadd float %2, 1.000000e+00
   tail call void @llvm.assume(i1 %maskcond4)
-  %arrayidx5 = getelementptr inbounds float, float* %1, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %1, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -86,7 +85,7 @@ for.end:                                          ; preds = %for.body
 ; Test case for PR43620. Make sure we can vectorize with predication in presence
 ; of assume calls. For now, check that we drop all assumes in predicated blocks
 ; in the vector body.
-define void @predicated_assume(float* noalias nocapture readonly %a, float* noalias nocapture %b, i32 %n) {
+define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i32 %n) {
 ; Check that the vector.body does not contain any assumes.
 ; CHECK-LABEL: @predicated_assume(
 ; CHECK:       vector.body:
@@ -118,11 +117,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %1
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %0
   br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
index 9a050b0..f211108 100644
--- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; When scalarizing stores we need to preserve the original order.
 ; Make sure that we are extracting in the correct order (0101, and not 0011).
 
-define i32 @foo(i32* nocapture %A) {
+define i32 @foo(ptr nocapture %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -21,17 +21,17 @@ define i32 @foo(i32* nocapture %A) {
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 2, i64 2>
 ; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 8, i64 8>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP1]], i64 1
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
-; CHECK-NEXT:    store i32 4, i32* [[TMP3]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP5]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP7]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]]
+; CHECK-NEXT:    store i32 4, ptr [[TMP3]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP5]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP7]], align 4
+; CHECK-NEXT:    store i32 4, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
@@ -51,8 +51,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  store i32 4, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  store i32 4, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10000
diff --git a/llvm/test/Transforms/LoopVectorize/calloc.ll b/llvm/test/Transforms/LoopVectorize/calloc.ll
index 2f3446f..67ea79f 100644
--- a/llvm/test/Transforms/LoopVectorize/calloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/calloc.ll
@@ -6,11 +6,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: zext <4 x i8>
 ;CHECK: ret
 
-define noalias i8* @hexit(i8* nocapture %bytes, i64 %length) nounwind uwtable ssp {
+define noalias ptr @hexit(ptr nocapture %bytes, i64 %length) nounwind uwtable ssp {
 entry:
   %shl = shl i64 %length, 1
   %add28 = or i64 %shl, 1
-  %call = tail call i8* @calloc(i64 1, i64 %add28) nounwind
+  %call = tail call ptr @calloc(i64 1, i64 %add28) nounwind
   %cmp29 = icmp eq i64 %shl, 0
   br i1 %cmp29, label %for.end, label %for.body.lr.ph
 
@@ -21,8 +21,8 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %shr = lshr i64 %i.030, 1
-  %arrayidx = getelementptr inbounds i8, i8* %bytes, i64 %shr
-  %1 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %bytes, i64 %shr
+  %1 = load i8, ptr %arrayidx, align 1
   %conv = zext i8 %1 to i32
   %and = shl i64 %i.030, 2
   %neg = and i64 %and, 4
@@ -36,14 +36,14 @@ for.body:                                         ; preds = %for.body, %for.body
   %cond = select i1 %cmp15, i32 87, i32 48
   %add17 = add nsw i32 %cond, %shr11
   %conv18 = trunc i32 %add17 to i8
-  %arrayidx19 = getelementptr inbounds i8, i8* %call, i64 %i.030
-  store i8 %conv18, i8* %arrayidx19, align 1
+  %arrayidx19 = getelementptr inbounds i8, ptr %call, i64 %i.030
+  store i8 %conv18, ptr %arrayidx19, align 1
   %inc = add i64 %i.030, 1
   %exitcond = icmp eq i64 %inc, %0
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  ret i8* %call
+  ret ptr %call
 }
 
-declare noalias i8* @calloc(i64, i64) nounwind
+declare noalias ptr @calloc(i64, i64) nounwind
diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
index 7b9ba15..a4433bc 100644
--- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll
@@ -28,9 +28,9 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %iv
+  %gep = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %iv
   %iv.trunc = trunc i64 %iv to i32
-  store i32 %iv.trunc, i32* %gep, align 4
+  store i32 %iv.trunc, ptr %gep, align 4
   %iv.next = add i64 %iv, 1
   %iv.next.trunc = trunc i64 %iv.next to i32
   %exitcond = icmp eq i32 %iv.next.trunc, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll
index 78497f0..17013c5 100644
--- a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll
@@ -28,14 +28,14 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %1 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx2, align 4, !tbaa !2
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx2, align 4, !tbaa !2
   %add = add nsw i32 %2, %mul
-  store i32 %add, i32* %arrayidx2, align 4, !tbaa !2
+  store i32 %add, ptr %arrayidx2, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !6
@@ -62,14 +62,14 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @b, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %1 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx2, align 4, !tbaa !2
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx2, align 4, !tbaa !2
   %add = add nsw i32 %2, %mul
-  store i32 %add, i32* %arrayidx2, align 4, !tbaa !2
+  store i32 %add, ptr %arrayidx2, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1027
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !prof !7
diff --git a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
index 3d020dd..9a2aa29 100644
--- a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
+++ b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -5,19 +5,19 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @conditional_store(i32* noalias nocapture %indices) #0 !dbg !4 {
+define void @conditional_store(ptr noalias nocapture %indices) #0 !dbg !4 {
 entry:
   br label %for.body, !dbg !10
 
 for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %indices, i64 %indvars.iv, !dbg !12
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !12, !tbaa !14
+  %arrayidx = getelementptr inbounds i32, ptr %indices, i64 %indvars.iv, !dbg !12
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !12, !tbaa !14
   %cmp1 = icmp eq i32 %0, 1024, !dbg !12
   br i1 %cmp1, label %if.then, label %for.inc, !dbg !12
 
 if.then:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14
+  store i32 0, ptr %arrayidx, align 4, !dbg !18, !tbaa !14
   br label %for.inc, !dbg !18
 
 for.inc:                                          ; preds = %for.body, %if.then
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 9277f1c..a27f2f0 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -20,7 +20,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define i32 @_Z4testPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
@@ -30,13 +30,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !12
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !12, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !12
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !12, !tbaa !15
   %cmp1 = icmp sgt i32 %0, 10, !dbg !12
   br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12
 
 if.else:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4, !dbg !19, !tbaa !15
+  store i32 0, ptr %arrayidx, align 4, !dbg !19, !tbaa !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %1 = trunc i64 %indvars.iv.next to i32, !dbg !10
   %cmp = icmp slt i32 %1, %Length, !dbg !10
diff --git a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
index bf7fa7c..1d81f06 100644
--- a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -9,37 +9,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: ret i32
 define i32 @cpp_new_arrays() uwtable ssp {
 entry:
-  %call = call noalias i8* @_Znwm(i64 4)
-  %0 = bitcast i8* %call to float*
-  store float 1.000000e+03, float* %0, align 4
-  %call1 = call noalias i8* @_Znwm(i64 4)
-  %1 = bitcast i8* %call1 to float*
-  store float 1.000000e+03, float* %1, align 4
-  %call3 = call noalias i8* @_Znwm(i64 4)
-  %2 = bitcast i8* %call3 to float*
-  store float 1.000000e+03, float* %2, align 4
+  %call = call noalias ptr @_Znwm(i64 4)
+  store float 1.000000e+03, ptr %call, align 4
+  %call1 = call noalias ptr @_Znwm(i64 4)
+  store float 1.000000e+03, ptr %call1, align 4
+  %call3 = call noalias ptr @_Znwm(i64 4)
+  store float 1.000000e+03, ptr %call3, align 4
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.01 to i64
-  %arrayidx = getelementptr inbounds float, float* %0, i64 %idxprom
-  %3 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %call, i64 %idxprom
+  %0 = load float, ptr %arrayidx, align 4
   %idxprom5 = sext i32 %i.01 to i64
-  %arrayidx6 = getelementptr inbounds float, float* %1, i64 %idxprom5
-  %4 = load float, float* %arrayidx6, align 4
-  %add = fadd float %3, %4
+  %arrayidx6 = getelementptr inbounds float, ptr %call1, i64 %idxprom5
+  %1 = load float, ptr %arrayidx6, align 4
+  %add = fadd float %0, %1
   %idxprom7 = sext i32 %i.01 to i64
-  %arrayidx8 = getelementptr inbounds float, float* %2, i64 %idxprom7
-  store float %add, float* %arrayidx8, align 4
+  %arrayidx8 = getelementptr inbounds float, ptr %call3, i64 %idxprom7
+  store float %add, ptr %arrayidx8, align 4
   %inc = add nsw i32 %i.01, 1
   %cmp = icmp slt i32 %inc, 1000
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
-  %5 = load float, float* %2, align 4
-  %conv10 = fptosi float %5 to i32
+  %2 = load float, ptr %call3, align 4
+  %conv10 = fptosi float %2 to i32
   ret i32 %conv10
 }
 
-declare noalias i8* @_Znwm(i64)
+declare noalias ptr @_Znwm(i64)
diff --git a/llvm/test/Transforms/LoopVectorize/dbg.value.ll b/llvm/test/Transforms/LoopVectorize/dbg.value.ll
index d05b89b..16d6f8b 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg.value.ll
@@ -18,13 +18,13 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   ;CHECK: load <4 x i32>
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !23
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !23
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !23
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !23
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv, !dbg !23
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !23
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @C, i64 0, i64 %indvars.iv, !dbg !23
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !23
   %add = add nsw i32 %1, %0, !dbg !23
-  %arrayidx4 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !23
-  store i32 %add, i32* %arrayidx4, align 4, !dbg !23
+  %arrayidx4 = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv, !dbg !23
+  store i32 %add, ptr %arrayidx4, align 4, !dbg !23
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !22
   tail call void @llvm.dbg.value(metadata !12, metadata !19, metadata !21), !dbg !22
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !22
diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
index a1aa330bb..eeb270c 100644
--- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
@@ -13,8 +13,8 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:       %[[I0:.+]] = add i64 %index, 0
 ; CHECK:       %[[I2:.+]] = add i64 %index, 2
-; CHECK:       getelementptr inbounds i64, i64* %a, i64 %[[I0]]
-; CHECK:       getelementptr inbounds i64, i64* %a, i64 %[[I2]]
+; CHECK:       getelementptr inbounds i64, ptr %a, i64 %[[I0]]
+; CHECK:       getelementptr inbounds i64, ptr %a, i64 %[[I2]]
 ; CHECK-NOT:   add nuw nsw i64 %[[I0]], 1
 ; CHECK-NOT:   add nuw nsw i64 %[[I2]], 1
 ; CHECK-NOT:   icmp slt i64 {{.*}}, %n
@@ -22,15 +22,15 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       %[[CMP:.+]] = icmp eq i64 %index.next, %n.vec
 ; CHECK:       br i1 %[[CMP]], label %middle.block, label %vector.body
 ;
-define i64 @dead_instructions_01(i64 *%a, i64 %n) {
+define i64 @dead_instructions_01(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
   %r = phi i64 [ %tmp2, %for.body ], [ 0, %entry ]
-  %tmp0 = getelementptr inbounds i64, i64* %a, i64 %i
-  %tmp1 = load i64, i64* %tmp0, align 8
+  %tmp0 = getelementptr inbounds i64, ptr %a, i64 %i
+  %tmp1 = load i64, ptr %tmp0, align 8
   %tmp2 = add i64 %tmp1, %r
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
@@ -51,7 +51,7 @@ for.end:
 ;
 ; CHECK:     vector.body:
 ;
-define void @pr47390(i32 *%a) {
+define void @pr47390(ptr %a) {
 entry:
   br label %loop
 
@@ -64,8 +64,8 @@ loop:
   %secondary = phi i32 [ 1, %entry ], [ %secondary_add, %loop ]
   %primary_add = add i32 %primary, 1
   %secondary_add = add i32 %secondary, 1
-  %gep = getelementptr inbounds i32, i32* %a, i32 %secondary
-  %load = load i32, i32* %gep, align 8
+  %gep = getelementptr inbounds i32, ptr %a, i32 %secondary
+  %load = load i32, ptr %gep, align 8
   %cmp = icmp eq i32 %secondary, 5
   br i1 %cmp, label %exit, label %loop
 }
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll
index 4c16354..2e6b874 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll
@@ -9,9 +9,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK:   min.iters.check = icmp ult i64 {{.*}}, 2, !dbg !{{[0-9]+}}
 ; CHECK: vector.body
 ; CHECK:   index {{.*}}, !dbg ![[LOC1:[0-9]+]]
-; CHECK:   getelementptr inbounds i32, i32* %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
-; CHECK:   getelementptr inbounds i32, i32* %b, {{.*}}, !dbg ![[LOC1]]
-; CHECK:   load <2 x i32>, <2 x i32>* {{.*}}, !dbg ![[LOC1]]
+; CHECK:   getelementptr inbounds i32, ptr %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
+; CHECK:   getelementptr inbounds i32, ptr %b, {{.*}}, !dbg ![[LOC1]]
+; CHECK:   load <2 x i32>, ptr {{.*}}, !dbg ![[LOC1]]
 ; CHECK:   add <2 x i32> {{.*}}, !dbg ![[LOC1]]
 ; CHECK:   add nuw i64 %index, 2, !dbg ![[LOC1]]
 ; CHECK:   icmp eq i64 %index.next, %n.vec, !dbg ![[LOC1]]
@@ -20,9 +20,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: for.body
 ; CHECK: br i1{{.*}}, label %for.body,{{.*}}, !dbg ![[BR_LOC]],
 
-define i32 @f(i32* nocapture %a, i32* %b, i32 %size) !dbg !4 {
+define i32 @f(ptr nocapture %a, ptr %b, i32 %size) !dbg !4 {
 entry:
-  call void @llvm.dbg.value(metadata i32* %a, metadata !13, metadata !DIExpression()), !dbg !19
+  call void @llvm.dbg.value(metadata ptr %a, metadata !13, metadata !DIExpression()), !dbg !19
   call void @llvm.dbg.value(metadata i32 %size, metadata !14, metadata !DIExpression()), !dbg !19
   call void @llvm.dbg.value(metadata i32 0, metadata !15, metadata !DIExpression()), !dbg !20
   call void @llvm.dbg.value(metadata i32 0, metadata !16, metadata !DIExpression()), !dbg !21
@@ -35,10 +35,10 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %sum = phi i32 [ 0, %for.body.lr.ph ], [ %sum.next, %for.body ]
-  %arrayidx.1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv, !dbg !19
-  %arrayidx.2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv, !dbg !22
-  %l.1 = load i32, i32* %arrayidx.1, align 4, !dbg !22
-  %l.2 = load i32, i32* %arrayidx.2, align 4, !dbg !22
+  %arrayidx.1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv, !dbg !19
+  %arrayidx.2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv, !dbg !22
+  %l.1 = load i32, ptr %arrayidx.1, align 4, !dbg !22
+  %l.2 = load i32, ptr %arrayidx.2, align 4, !dbg !22
   %add.1 = add i32 %l.1, %l.2
   %sum.next = add i32 %add.1, %sum, !dbg !22
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !22
@@ -57,10 +57,10 @@ for.end:                                          ; preds = %entry, %for.cond.fo
   ret i32 %sum.0.lcssa, !dbg !26
 }
 
-define i32 @test_debug_loc_on_branch_in_loop(i32* noalias %src, i32* noalias %dst) {
+define i32 @test_debug_loc_on_branch_in_loop(ptr noalias %src, ptr noalias %dst) {
 ; CHECK-LABEL: define i32 @test_debug_loc_on_branch_in_loop(
 ; CHECK-LABEL: vector.body:
-; CHECK:        [[LOAD:%.+]] = load <2 x i32>, <2 x i32>* {{.+}}, align 4
+; CHECK:        [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4
 ; CHECK-NEXT:   [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], <i32 10, i32 10>
 ; CHECK-NEXT:   [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], <i1 true, i1 true>, !dbg [[LOC3:!.+]]
 ; CHECK-NEXT:   [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC3]]
@@ -68,8 +68,8 @@ define i32 @test_debug_loc_on_branch_in_loop(i32* noalias %src, i32* noalias %ds
 ; CHECK-NOT:  !dbg
 ; CHECK-EMPTY:
 ; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, i32* %dst, i64 {{.+}}, !dbg [[LOC3]]
-; CHECK-NEXT:   store i32 0, i32* [[GEP]], align 4, !dbg [[LOC3]]
+; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 {{.+}}, !dbg [[LOC3]]
+; CHECK-NEXT:   store i32 0, ptr [[GEP]], align 4, !dbg [[LOC3]]
 ; CHECK-NEXT:   br label %pred.store.continue, !dbg [[LOC3]]
 ; CHECK-EMPTY:
 ;
@@ -78,14 +78,14 @@ entry:
 
 loop.header:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %l = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
   %cmp = icmp eq i32 %l, 10
   br i1 %cmp, label %loop.latch, label %if.then, !dbg !28
 
 if.then:
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv
-  store i32 0, i32* %gep.dst, align 4
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
+  store i32 0, ptr %gep.dst, align 4
   br label %loop.latch
 
 loop.latch:
@@ -97,10 +97,10 @@ exit:
   ret i32 0
 }
 
-define i32 @test_different_debug_loc_on_replicate_recipe(i32* noalias %src, i32* noalias %dst) {
+define i32 @test_different_debug_loc_on_replicate_recipe(ptr noalias %src, ptr noalias %dst) {
 ; CHECK-LABEL: define i32 @test_different_debug_loc_on_replicate_recipe(
 ; CHECK-LABEL: vector.body:
-; CHECK:        [[LOAD:%.+]] = load <2 x i32>, <2 x i32>* {{.+}}, align 4
+; CHECK:        [[LOAD:%.+]] = load <2 x i32>, ptr {{.+}}, align 4
 ; CHECK-NEXT:   [[CMP:%.+]] = icmp eq <2 x i32> [[LOAD]], <i32 10, i32 10>
 ; CHECK-NEXT:   [[XOR:%.+]] = xor <2 x i1> [[CMP:%.+]], <i1 true, i1 true>, !dbg [[LOC4:!.+]]
 ; CHECK-NEXT:   [[EXT:%.+]] = extractelement <2 x i1> [[XOR]], i32 0, !dbg [[LOC4]]
@@ -108,8 +108,8 @@ define i32 @test_different_debug_loc_on_replicate_recipe(i32* noalias %src, i32*
 ; CHECK-NOT:  !dbg
 ; CHECK-EMPTY:
 ; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, i32* %dst, i64 {{.+}}, !dbg [[LOC5:!.+]]
-; CHECK-NEXT:   store i32 0, i32* [[GEP]], align 4, !dbg [[LOC5]]
+; CHECK-NEXT:   [[GEP:%.+]] = getelementptr inbounds i32, ptr %dst, i64 {{.+}}, !dbg [[LOC5:!.+]]
+; CHECK-NEXT:   store i32 0, ptr [[GEP]], align 4, !dbg [[LOC5]]
 ; CHECK-NEXT:   br label %pred.store.continue, !dbg [[LOC4]]
 ; CHECK-EMPTY:
 ;
@@ -118,14 +118,14 @@ entry:
 
 loop.header:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
-  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
-  %l = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+  %l = load i32, ptr %gep.src, align 4
   %cmp = icmp eq i32 %l, 10
   br i1 %cmp, label %loop.latch, label %if.then, !dbg !33
 
 if.then:
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %iv, !dbg !34
-  store i32 0, i32* %gep.dst, align 4
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv, !dbg !34
+  store i32 0, ptr %gep.dst, align 4
   br label %loop.latch
 
 loop.latch:
diff --git a/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll b/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll
index 0a46685..41756ff 100644
--- a/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/demanded-bits-of-pointer-instruction.ll
@@ -4,15 +4,15 @@
 ; Only make sure we do not crash.
 
 ; CHECK: @test
-define void @test(i8* %ptr, i8* %ptr_end) {
+define void @test(ptr %ptr, ptr %ptr_end) {
 start:
   br label %loop
 
 loop:
-  %ptr2 = phi i8* [ %ptr3, %loop ], [ %ptr, %start ]
+  %ptr2 = phi ptr [ %ptr3, %loop ], [ %ptr, %start ]
   %x = sext i8 undef to i64
-  %ptr3 = getelementptr inbounds i8, i8* %ptr2, i64 1
-  %cmp = icmp ult i8* %ptr3, %ptr_end
+  %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 1
+  %cmp = icmp ult ptr %ptr3, %ptr_end
   br i1 %cmp, label %loop, label %end
 
 end:
diff --git a/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll b/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll
index ce5d872..8ce2a97 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-missing-instr-debug-loc.ll
@@ -40,11 +40,11 @@ for.body:                                         ; preds = %for.body.preheader,
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %a.addr.08 = phi i32 [ %0, %for.body ], [ %a, %for.body.preheader ]
 
-  %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @out, i64 0, i64 %indvars.iv, !dbg !10
-  store i32 %a.addr.08, i32* %arrayidx, align 4, !dbg !12, !tbaa !13
+  %arrayidx = getelementptr inbounds [0 x i32], ptr @out, i64 0, i64 %indvars.iv, !dbg !10
+  store i32 %a.addr.08, ptr %arrayidx, align 4, !dbg !12, !tbaa !13
   %idxprom1 = sext i32 %a.addr.08 to i64, !dbg !17
-  %arrayidx2 = getelementptr inbounds [0 x i32], [0 x i32]* @map, i64 0, i64 %idxprom1, !dbg !17
-  %0 = load i32, i32* %arrayidx2, align 4, !dbg !17, !tbaa !13
+  %arrayidx2 = getelementptr inbounds [0 x i32], ptr @map, i64 0, i64 %idxprom1, !dbg !17
+  %0 = load i32, ptr %arrayidx2, align 4, !dbg !17, !tbaa !13
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !9
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9, !llvm.loop !18
diff --git a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
index 2c6c25d..ed107b1 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
@@ -3,21 +3,21 @@
 ;   1	void cold(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   2	  for(int i = 0; i < N; i++) {
 ;   3	    A[i + 1] = A[i] + B[i];
-;   4	    C[i] = D[i] * E[i];
+;   4	    C[i] = Dptr E[i];
 ;   5	  }
 ;   6	}
 ;   7
 ;   8	void hot(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   9	  for(int i = 0; i < N; i++) {
 ;  10	    A[i + 1] = A[i] + B[i];
-;  11	    C[i] = D[i] * E[i];
+;  11	    C[i] = Dptr E[i];
 ;  12	  }
 ;  13	}
 ;  14
 ;  15	void unknown(char *A, char *B, char *C, char *D, char *E, int N) {
 ;  16	  for(int i = 0; i < N; i++) {
 ;  17	    A[i + 1] = A[i] + B[i];
-;  18	    C[i] = D[i] * E[i];
+;  18	    C[i] = Dptr E[i];
 ;  19	  }
 ;  20	}
 
@@ -33,7 +33,7 @@ source_filename = "/tmp/s.c"
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @cold(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
+define void @cold(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !9
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !10, !prof !58
@@ -43,21 +43,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !12
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !16
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !12
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !12, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !16
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !16, !tbaa !13
   %add = add i8 %1, %0, !dbg !17
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !18
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !19, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !20
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !20, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !21
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !21, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !18
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !19, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !20
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !20, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !21
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !21, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !22
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !23
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !24, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !23
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !24, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !25, !prof !59
@@ -67,7 +67,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @hot(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
+define void @hot(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !27
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !28, !prof !58
@@ -77,21 +77,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !30
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !30, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !31
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !31, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !30
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !30, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !31
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !31, !tbaa !13
   %add = add i8 %1, %0, !dbg !32
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !33
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !34, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !35
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !35, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !36
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !36, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !33
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !34, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !35
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !35, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !36
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !36, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !37
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !38
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !39, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !38
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !39, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !28
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !28
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !28, !llvm.loop !40, !prof !59
@@ -101,7 +101,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @unknown(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
+define void @unknown(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !42
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !43
@@ -111,21 +111,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !45
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !45, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !46
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !46, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !45
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !45, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !46
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !46, !tbaa !13
   %add = add i8 %1, %0, !dbg !47
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !43
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !48
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !49, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !50
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !50, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !51
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !51, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !48
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !49, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !50
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !50, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !51
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !51, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !52
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !53
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !54, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !53
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !54, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !43
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !43
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !43, !llvm.loop !55
diff --git a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
index ab1401f..30d11a1 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
@@ -18,21 +18,21 @@
 ;   1	void cold(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   2	  for(int i = 0; i < N; i++) {
 ;   3	    A[i + 1] = A[i] + B[i];
-;   4	    C[i] = D[i] * E[i];
+;   4	    C[i] = Dptr E[i];
 ;   5	  }
 ;   6	}
 ;   7
 ;   8	void hot(char *A, char *B, char *C, char *D, char *E, int N) {
 ;   9	  for(int i = 0; i < N; i++) {
 ;  10	    A[i + 1] = A[i] + B[i];
-;  11	    C[i] = D[i] * E[i];
+;  11	    C[i] = Dptr E[i];
 ;  12	  }
 ;  13	}
 ;  14
 ;  15	void unknown(char *A, char *B, char *C, char *D, char *E, int N) {
 ;  16	  for(int i = 0; i < N; i++) {
 ;  17	    A[i + 1] = A[i] + B[i];
-;  18	    C[i] = D[i] * E[i];
+;  18	    C[i] = Dptr E[i];
 ;  19	  }
 ;  20	}
 
@@ -47,7 +47,7 @@ source_filename = "/tmp/s.c"
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @cold(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
+define void @cold(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !7 !prof !56 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !9
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !10, !prof !58
@@ -57,21 +57,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !12
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !16
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !12
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !12, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !16
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !16, !tbaa !13
   %add = add i8 %1, %0, !dbg !17
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !18
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !19, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !20
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !20, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !21
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !21, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !18
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !19, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !20
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !20, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !21
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !21, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !22
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !23
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !24, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !23
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !24, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !25, !prof !59
@@ -81,7 +81,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @hot(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
+define void @hot(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !26 !prof !57 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !27
   br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !28, !prof !58
@@ -91,21 +91,21 @@ ph:
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !30
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !30, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !31
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !31, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !30
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !30, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !31
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !31, !tbaa !13
   %add = add i8 %1, %0, !dbg !32
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !33
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !34, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !35
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !35, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !36
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !36, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !33
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !34, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !35
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !35, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !36
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !36, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !37
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !38
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !39, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !38
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !39, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !28
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !28
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !28, !llvm.loop !40, !prof !59
@@ -115,7 +115,7 @@ for.cond.cleanup:
 }
 
 ; Function Attrs: norecurse nounwind ssp uwtable
-define void @unknown(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
+define void @unknown(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) local_unnamed_addr #0 !dbg !41 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !42
   br i1 %cmp28, label %for.body, label %for.cond.cleanup, !dbg !43
@@ -125,21 +125,21 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !45
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !45, !tbaa !13
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !46
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !46, !tbaa !13
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !45
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !45, !tbaa !13
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !46
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !46, !tbaa !13
   %add = add i8 %1, %0, !dbg !47
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !43
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !48
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !49, !tbaa !13
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !50
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !50, !tbaa !13
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !51
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !51, !tbaa !13
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !48
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !49, !tbaa !13
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !50
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !50, !tbaa !13
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !51
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !51, !tbaa !13
   %mul = mul i8 %3, %2, !dbg !52
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !53
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !54, !tbaa !13
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !53
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !54, !tbaa !13
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !43
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !43
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !43, !llvm.loop !55
diff --git a/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll b/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll
index 3ac4e52..9a8f863 100644
--- a/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll
+++ b/llvm/test/Transforms/LoopVectorize/disable_nonforced.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK-LABEL: @disable_nonforced(
 ; CHECK-NOT: x i32>
-define void @disable_nonforced(i32* nocapture %a, i32 %n) {
+define void @disable_nonforced(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll b/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll
index f4a74f6..8c4e50f 100644
--- a/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll
+++ b/llvm/test/Transforms/LoopVectorize/disable_nonforced_enable.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK-LABEL: @disable_nonforced_enable(
 ; CHECK: store <2 x i32>
-define void @disable_nonforced_enable(i32* nocapture %a, i32 %n) {
+define void @disable_nonforced_enable(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/discriminator.ll b/llvm/test/Transforms/LoopVectorize/discriminator.ll
index 59379f4..6c7d10f 100644
--- a/llvm/test/Transforms/LoopVectorize/discriminator.ll
+++ b/llvm/test/Transforms/LoopVectorize/discriminator.ll
@@ -14,25 +14,25 @@
 ;  6     a[i] += b[i];
 ;  7 }
 
-@a = local_unnamed_addr global i32* null, align 8
-@b = local_unnamed_addr global i32* null, align 8
+@a = local_unnamed_addr global ptr null, align 8
+@b = local_unnamed_addr global ptr null, align 8
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 define void @_Z3foov() local_unnamed_addr #0 !dbg !6 {
-  %1 = load i32*, i32** @b, align 8, !dbg !8, !tbaa !9
-  %2 = load i32*, i32** @a, align 8, !dbg !13, !tbaa !9
+  %1 = load ptr, ptr @b, align 8, !dbg !8, !tbaa !9
+  %2 = load ptr, ptr @a, align 8, !dbg !13, !tbaa !9
   br label %3, !dbg !14
 
 ; <label>:3:                                      ; preds = %3, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %3 ]
-  %4 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv, !dbg !8
-  %5 = load i32, i32* %4, align 4, !dbg !8, !tbaa !15
-  %6 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv, !dbg !13
-  %7 = load i32, i32* %6, align 4, !dbg !17, !tbaa !15
+  %4 = getelementptr inbounds i32, ptr %1, i64 %indvars.iv, !dbg !8
+  %5 = load i32, ptr %4, align 4, !dbg !8, !tbaa !15
+  %6 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv, !dbg !13
+  %7 = load i32, ptr %6, align 4, !dbg !17, !tbaa !15
   %8 = add nsw i32 %7, %5, !dbg !17
 ;DBG_VALUE: call void @llvm.dbg.declare{{.*}}!dbg ![[DBG:[0-9]*]]
   call void @llvm.dbg.declare(metadata i32 %8, metadata !22, metadata !DIExpression()), !dbg !17
-  store i32 %8, i32* %6, align 4, !dbg !17, !tbaa !15
+  store i32 %8, ptr %6, align 4, !dbg !17, !tbaa !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !18
   %exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !19
   br i1 %exitcond, label %9, label %3, !dbg !14, !llvm.loop !20
diff --git a/llvm/test/Transforms/LoopVectorize/exact.ll b/llvm/test/Transforms/LoopVectorize/exact.ll
index 77d31ae..0012acf 100644
--- a/llvm/test/Transforms/LoopVectorize/exact.ll
+++ b/llvm/test/Transforms/LoopVectorize/exact.ll
@@ -4,16 +4,16 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; CHECK-LABEL: @lshr_exact(
 ; CHECK: lshr exact <4 x i32>
-define void @lshr_exact(i32* %x) {
+define void @lshr_exact(ptr %x) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
   %conv1 = lshr exact i32 %0, 1
-  store i32 %conv1, i32* %arrayidx, align 4
+  store i32 %conv1, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 256
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
index b8a1066..e011158 100644
--- a/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
+++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
@@ -11,7 +11,7 @@
 ; #pragma clang loop vectorize(enable)
 ;   for (i = 0; i < N; i++) {
 ;     for (j = 0; j < M; j++) {
-;       a[i*M+j] = b[i*M+j] * b[i*M+j];
+;       a[i*M+j] = bptr b[i*M+j];
 ;     }
 ;   }
 ; }
@@ -27,7 +27,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @vector_width(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @vector_width(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -50,11 +50,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -75,7 +75,7 @@ for.end15:                                        ; preds = %outer.inc, %entry
 ; CHECK: LV: We can vectorize this outer loop!
 ; CHECK: LV: Using VF 1 to build VPlans.
 
-define void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @case2(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -98,11 +98,11 @@ inner.ph:                                  ; preds = %outer.body
 inner.body:                                        ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -125,7 +125,7 @@ for.end15:                                        ; preds = %outer.inc, %entry
 ; CHECK: LV: Loop hints: force=?
 ; CHECK: LV: Found a loop: inner.body
 
-define void @case3(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @case3(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -148,11 +148,11 @@ inner.ph:                                         ; preds = %outer.body
 inner.body:                                       ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -175,7 +175,7 @@ for.end15:                                        ; preds = %outer.inc, %entry
 ; CHECK: LV: Loop hints: force=?
 ; CHECK: LV: Found a loop: inner.body
 
-define void @case4(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @case4(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -198,11 +198,11 @@ inner.ph:                                  ; preds = %outer.body
 inner.body:                                        ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll
index 7060864..1253bbf 100644
--- a/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll
+++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_nonuniform_inner.ll
@@ -12,7 +12,7 @@
 ;   for (i = 0; i < N; i++) {
 ;     // Tested inner loop. It will be replaced per test.
 ;     for (j = 0; j < M; j++) {
-;       a[i*M+j] = b[i*M+j] * b[i*M+j];
+;       a[i*M+j] = bptr b[i*M+j];
 ;     }
 ;   }
 ; }
@@ -25,7 +25,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @iv_start(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @iv_start(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp33 = icmp sgt i32 %N, 0
   br i1 %cmp33, label %outer.ph, label %for.end15
@@ -48,11 +48,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.body, %inner.ph
   %indvars.iv35 = phi i64 [ %indvars.iv38, %inner.ph ], [ %indvars.iv.next36, %inner.body ]
   %2 = add nsw i64 %indvars.iv35, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
   %exitcond = icmp eq i64 %indvars.iv.next36, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -73,7 +73,7 @@ for.end15:                                  ; preds = %outer.inc, %entry
 ; CHECK: LV: Not vectorizing: Outer loop contains divergent loops.
 ; CHECK: LV: Not vectorizing: Unsupported outer loop.
 
-define void @loop_ub(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @loop_ub(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -95,11 +95,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.body, %inner.ph
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %indvars.iv38
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -119,7 +119,7 @@ for.end15:                                  ; preds = %outer.inc, %entry
 ; CHECK: LV: Not vectorizing: Outer loop contains divergent loops.
 ; CHECK: LV: Not vectorizing: Unsupported outer loop.
 
-define void @iv_step(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @iv_step(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp33 = icmp sgt i32 %N, 0
   br i1 %cmp33, label %outer.ph, label %for.end15
@@ -141,11 +141,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.ph, %inner.body
   %indvars.iv36 = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next37, %inner.body ]
   %2 = add nsw i64 %indvars.iv36, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %mul8 = mul nsw i32 %3, %3
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %mul8, ptr %arrayidx12, align 4, !tbaa !2
   %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, %indvars.iv39
   %cmp2 = icmp slt i64 %indvars.iv.next37, %0
   br i1 %cmp2, label %inner.body, label %for.inc14
diff --git a/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll b/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll
index 259f4f4..ac87c15 100644
--- a/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll
+++ b/llvm/test/Transforms/LoopVectorize/explicit_outer_uniform_diverg_branch.ll
@@ -13,7 +13,7 @@
 ;     // Tested conditional branch. COND will be replaced per test.
 ;     if (COND)
 ;       for (j = 0; j < M; j++) {
-;         a[i*M+j] = b[i*M+j] * b[i*M+j];
+;         a[i*M+j] = bptr b[i*M+j];
 ;       }
 ;   }
 ; }
@@ -25,7 +25,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @uniform_branch(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @uniform_branch(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp39 = icmp sgt i32 %N, 0
   br i1 %cmp39, label %outer.ph, label %for.end19
@@ -42,8 +42,8 @@ outer.ph:                                   ; preds = %entry
 outer.body:                                 ; preds = %outer.inc, %outer.ph
   %indvars.iv42 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next43, %outer.inc ]
   %1 = mul nsw i64 %indvars.iv42, %0
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %1
-  %2 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %1
+  %2 = load i32, ptr %arrayidx, align 4, !tbaa !2
   br i1 %brmerge, label %outer.inc, label %inner.ph ; Supported uniform branch
 
 inner.ph:                                   ; preds = %outer.body
@@ -52,11 +52,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.ph, %inner.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %inner.body ], [ 0, %inner.ph ]
   %3 = add nsw i64 %indvars.iv, %1
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %3
-  %4 = load i32, i32* %arrayidx7, align 4, !tbaa !2
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i64 %3
+  %4 = load i32, ptr %arrayidx7, align 4, !tbaa !2
   %mul12 = mul nsw i32 %4, %4
-  %arrayidx16 = getelementptr inbounds i32, i32* %a, i64 %3
-  store i32 %mul12, i32* %arrayidx16, align 4, !tbaa !2
+  %arrayidx16 = getelementptr inbounds i32, ptr %a, i64 %3
+  store i32 %mul12, ptr %arrayidx16, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %M64
   br i1 %exitcond, label %outer.inc, label %inner.body
@@ -77,7 +77,7 @@ for.end19:                                  ; preds = %outer.inc, %entry
 ; CHECK: Unsupported conditional branch.
 ; CHECK: LV: Not vectorizing: Unsupported outer loop.
 
-define void @divergent_branch(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
+define void @divergent_branch(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
 entry:
   %cmp39 = icmp sgt i32 %N, 0
   br i1 %cmp39, label %outer.ph, label %for.end19
@@ -92,8 +92,8 @@ outer.ph:                                   ; preds = %entry
 outer.body:                                 ; preds = %outer.inc, %outer.ph
   %indvars.iv42 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next43, %outer.inc ]
   %1 = mul nsw i64 %indvars.iv42, %0
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %1
-  %2 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %1
+  %2 = load i32, ptr %arrayidx, align 4, !tbaa !2
   %cmp1 = icmp ne i32 %2, 0 ; Divergent condition
   %brmerge = or i1 %cmp1, %cmp337 ; Divergent condition
   br i1 %brmerge, label %outer.inc, label %inner.ph ; Unsupported divergent branch.
@@ -104,11 +104,11 @@ inner.ph:                                   ; preds = %outer.body
 inner.body:                                 ; preds = %inner.ph, %inner.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %inner.body ], [ 0, %inner.ph ]
   %3 = add nsw i64 %indvars.iv, %1
-  %arrayidx7 = getelementptr inbounds i32, i32* %b, i64 %3
-  %4 = load i32, i32* %arrayidx7, align 4, !tbaa !2
+  %arrayidx7 = getelementptr inbounds i32, ptr %b, i64 %3
+  %4 = load i32, ptr %arrayidx7, align 4, !tbaa !2
   %mul12 = mul nsw i32 %4, %4
-  %arrayidx16 = getelementptr inbounds i32, i32* %a, i64 %3
-  store i32 %mul12, i32* %arrayidx16, align 4, !tbaa !2
+  %arrayidx16 = getelementptr inbounds i32, ptr %a, i64 %3
+  store i32 %mul12, ptr %arrayidx16, align 4, !tbaa !2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %M64
   br i1 %exitcond, label %outer.inc, label %inner.body
diff --git a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
index cb5072c..90ce053 100644
--- a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
+++ b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -S -force-vector-width=4 < %s 2>%t | FileCheck %s
 
-define void @inv_store_last_lane(i32* noalias nocapture %a, i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) {
+define void @inv_store_last_lane(ptr noalias nocapture %a, ptr noalias nocapture %inv, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @inv_store_last_lane(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -11,13 +11,11 @@ define void @inv_store_last_lane(i32* noalias nocapture %a, i32* noalias nocaptu
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -30,18 +28,18 @@ define void @inv_store_last_lane(i32* noalias nocapture %a, i32* noalias nocaptu
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP7]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[INV:%.*]], i64 42
-; CHECK-NEXT:    store i32 [[MUL_LCSSA]], i32* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[INV:%.*]], i64 42
+; CHECK-NEXT:    store i32 [[MUL_LCSSA]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -49,22 +47,22 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = shl nsw i32 %0, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %exit, label %for.body
 
 exit:              ; preds = %for.body
-  %arrayidx5 = getelementptr inbounds i32, i32* %inv, i64 42
-  store i32 %mul, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %inv, i64 42
+  store i32 %mul, ptr %arrayidx5, align 4
   ret void
 }
 
-define float @ret_last_lane(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @ret_last_lane(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @ret_last_lane(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -74,13 +72,11 @@ define float @ret_last_lane(float* noalias nocapture %a, float* noalias nocaptur
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -93,11 +89,11 @@ define float @ret_last_lane(float* noalias nocapture %a, float* noalias nocaptur
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP7]], 2.000000e+00
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[MUL]], float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[MUL]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -110,11 +106,11 @@ entry:
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %0, 2.000000e+00
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %mul, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %exit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll b/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
index 7837ca8..3fd1b57 100644
--- a/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
+++ b/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
@@ -28,7 +28,7 @@
 ;   return ret;
 ; }
 
-define dso_local i32 @"foo"(i32 %count, i32* nocapture readonly %bar) local_unnamed_addr !dbg !8 {
+define dso_local i32 @"foo"(i32 %count, ptr nocapture readonly %bar) local_unnamed_addr !dbg !8 {
 entry:
   %cmp8 = icmp sgt i32 %count, 0, !dbg !10
   br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup, !dbg !10
@@ -48,8 +48,8 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %ret.09 = phi i32 [ %count, %for.body.preheader ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %bar, i64 %indvars.iv, !dbg !11
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !11, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, ptr %bar, i64 %indvars.iv, !dbg !11
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !11, !tbaa !15
   %add = add nsw i32 %0, %ret.09, !dbg !12
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !10
diff --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll
index df4c0fa9c..6042449 100644
--- a/llvm/test/Transforms/LoopVectorize/flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/flags.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: mul nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret i32
-define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+define i32 @flags1(i32 %n, ptr nocapture %A) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 9
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = mul nsw i32 %3, 3
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -32,16 +32,16 @@ define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
 ;CHECK: mul <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret i32
-define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+define i32 @flags2(i32 %n, ptr nocapture %A) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 9
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = mul i32 %3, 3
-  store i32 %4, i32* %2, align 4
+  store i32 %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -57,15 +57,15 @@ define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
 ; CHECK: fadd fast <4 x float>
 ; CHECK: br
 ; CHECK: call fast float @llvm.vector.reduce.fadd.v4f32
-define float @fast_math(float* noalias %s) {
+define float @fast_math(ptr noalias %s) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %q.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %s, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %s, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %q.04, %0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index 75193cc..f808a9a 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -6,7 +6,7 @@
 
 @fp_inc = common global float 0.000000e+00, align 4
 
-;void fp_iv_loop1(float init, float * __restrict__ A, int N) {
+;void fp_iv_loop1(float init, ptr __restrict__ A, int N) {
 ;  float x = init;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -17,13 +17,13 @@
 
 
 
-define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop1_fast_FMF(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL1:       for.body.lr.ph:
-; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -47,9 +47,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -64,8 +63,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -81,7 +80,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL2:       for.body.lr.ph:
-; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -106,12 +105,10 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP8]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -126,8 +123,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -143,7 +140,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC1_INTERL2:       for.body.lr.ph:
-; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -162,10 +159,10 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]]
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP7]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP6]], ptr [[TMP8]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -179,8 +176,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -196,7 +193,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -220,9 +217,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -233,8 +229,8 @@ define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -248,14 +244,14 @@ entry:
   br i1 %cmp4, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %fpinc = load float, float* @fp_inc, align 4
+  %fpinc = load float, ptr @fp_inc, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.05, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.05, ptr %arrayidx, align 4
   %add = fsub fast float %x.05, %fpinc
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -272,7 +268,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become
 ; 'fast' spontaneously - FMF should propagate from the original IR.
 
-define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 %N) {
 ;
 ;
 ;
@@ -282,7 +278,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL1:       for.body.lr.ph:
-; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -306,9 +302,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -323,8 +318,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -340,7 +335,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL2:       for.body.lr.ph:
-; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -365,12 +360,10 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP6]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP8]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -385,8 +378,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -402,7 +395,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC1_INTERL2:       for.body.lr.ph:
-; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -423,10 +416,10 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[TMP6]]
 ; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP8]], float* [[TMP10]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[TMP7]], ptr [[TMP9]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP8]], ptr [[TMP10]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -440,8 +433,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -457,7 +450,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
@@ -481,9 +474,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP6]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -494,8 +486,8 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -509,14 +501,14 @@ entry:
   br i1 %cmp4, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %fpinc = load float, float* @fp_inc, align 4
+  %fpinc = load float, ptr @fp_inc, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.05, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.05, ptr %arrayidx, align 4
   %add = fsub reassoc float %x.05, %fpinc
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -530,7 +522,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-;void fp_iv_loop2(float init, float * __restrict__ A, int N) {
+;void fp_iv_loop2(float init, ptr __restrict__ A, int N) {
 ;  float x = init;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -539,7 +531,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;}
 
 
-define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
+define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop2(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -562,9 +554,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -579,8 +570,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -614,12 +605,10 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP6]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -634,8 +623,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -669,10 +658,10 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], [[INIT]]
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP7]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP6]], ptr [[TMP8]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -686,8 +675,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -720,9 +709,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -733,8 +721,8 @@ define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -753,8 +741,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd fast float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -768,7 +756,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
   ret void
 }
 
-;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) {
+;void fp_iv_loop3(float init, ptr __restrict__ A, ptr __restrict__ B, ptr __restrict__ C, int N) {
 ;  int i = 0;
 ;  float x = init;
 ;  float y = 0.1;
@@ -782,13 +770,13 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;}
 
 
-define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) {
+define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias nocapture %B, ptr noalias nocapture %C, i32 %N) {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop3(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL1:       for.body.lr.ph:
-; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -818,18 +806,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND9]], <4 x float>* [[TMP9]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND9]], ptr [[TMP8]], align 4
 ; VEC4_INTERL1-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
 ; VEC4_INTERL1-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
 ; VEC4_INTERL1-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP10]]
-; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP12]], <4 x float>* [[TMP14]], align 4
-; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP11]], <4 x float>* [[TMP16]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP12]], ptr [[TMP13]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP15]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
@@ -847,15 +832,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -870,7 +855,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL2-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC4_INTERL2:       for.body.lr.ph:
-; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -903,30 +888,24 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT9]]
-; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND10]], <4 x float>* [[TMP9]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD11]], <4 x float>* [[TMP11]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND10]], ptr [[TMP8]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD11]], ptr [[TMP10]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST_SPLAT]]
 ; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST_SPLAT15]]
 ; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
 ; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -2.500000e+00, float -2.500000e+00, float -2.500000e+00, float -2.500000e+00>
 ; VEC4_INTERL2-NEXT:    [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP12]]
 ; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP13]]
-; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP19:%.*]] = bitcast float* [[TMP18]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP16]], <4 x float>* [[TMP19]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = bitcast float* [[TMP20]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP17]], <4 x float>* [[TMP21]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP23:%.*]] = bitcast float* [[TMP22]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP14]], <4 x float>* [[TMP23]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP15]], <4 x float>* [[TMP25]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP16]], ptr [[TMP18]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP17]], ptr [[TMP20]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP14]], ptr [[TMP22]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP24]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -4.000000e+00, float -4.000000e+00, float -4.000000e+00, float -4.000000e+00>
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT9]]
@@ -944,15 +923,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -967,7 +946,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC1_INTERL2-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC1_INTERL2:       for.body.lr.ph:
-; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -991,24 +970,24 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = fmul fast float [[TMP9]], -5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDUCTION6:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION6]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP11]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP8]], float* [[TMP12]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION6]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP11]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP8]], ptr [[TMP12]], align 4
 ; VEC1_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    [[TMP14:%.*]] = fadd fast float [[TMP8]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP10]], 0xBFD99999A0000000
 ; VEC1_INTERL2-NEXT:    [[TMP16:%.*]] = fadd fast float [[TMP10]], 0xBFECCCCCC0000000
 ; VEC1_INTERL2-NEXT:    [[TMP17:%.*]] = fadd fast float [[TMP15]], [[TMP13]]
 ; VEC1_INTERL2-NEXT:    [[TMP18:%.*]] = fadd fast float [[TMP16]], [[TMP14]]
-; VEC1_INTERL2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDUCTION6]]
-; VEC1_INTERL2-NEXT:    store float [[TMP17]], float* [[TMP19]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP18]], float* [[TMP20]], align 4
-; VEC1_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDUCTION6]]
-; VEC1_INTERL2-NEXT:    store float [[TMP15]], float* [[TMP21]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP16]], float* [[TMP22]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDUCTION6]]
+; VEC1_INTERL2-NEXT:    store float [[TMP17]], ptr [[TMP19]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP18]], ptr [[TMP20]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDUCTION6]]
+; VEC1_INTERL2-NEXT:    store float [[TMP15]], ptr [[TMP21]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP16]], ptr [[TMP22]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -1024,15 +1003,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
 ; VEC1_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -1047,7 +1026,7 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = load float, ptr @fp_inc, align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
@@ -1077,18 +1056,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND9]], <2 x float>* [[TMP9]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND9]], ptr [[TMP8]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP10:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP11:%.*]] = fadd fast <2 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP12:%.*]] = fadd fast <2 x float> [[TMP11]], [[TMP10]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP12]], <2 x float>* [[TMP14]], align 4
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP11]], <2 x float>* [[TMP16]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP12]], ptr [[TMP13]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP11]], ptr [[TMP15]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float -1.000000e+00, float -1.000000e+00>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
@@ -1101,15 +1077,15 @@ define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalia
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_011]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[ADD2]], ptr [[ARRAYIDX4]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[CONV1]], ptr [[ARRAYIDX6]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
@@ -1122,22 +1098,22 @@ entry:
   br i1 %cmp9, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %0 = load float, float* @fp_inc, align 4
+  %0 = load float, ptr @fp_inc, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ]
   %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.011, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.011, ptr %arrayidx, align 4
   %add = fadd fast float %x.011, %0
   %conv1 = fadd fast float %y.012, -5.000000e-01
   %add2 = fadd fast float %conv1, %add
-  %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  store float %add2, float* %arrayidx4, align 4
-  %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv
-  store float %conv1, float* %arrayidx6, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  store float %add2, ptr %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %C, i64 %indvars.iv
+  store float %conv1, ptr %arrayidx6, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N
@@ -1151,7 +1127,7 @@ for.end:
 }
 
 ; Start and step values are constants. There is no 'fmul' operation in this case
-;void fp_iv_loop4(float * __restrict__ A, int N) {
+;void fp_iv_loop4(ptr __restrict__ A, int N) {
 ;  float x = 1.0;
 ;  for (int i=0; i < N; ++i) {
 ;    A[i] = x;
@@ -1160,7 +1136,7 @@ for.end:
 ;}
 
 
-define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
+define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1-LABEL: @fp_iv_loop4(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -1180,9 +1156,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
 ; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1197,8 +1172,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL1-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL1-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1229,12 +1204,10 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4
+; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP6]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1249,8 +1222,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC4_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC4_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1284,10 +1257,10 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], 1.000000e+00
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fadd fast float [[TMP5]], 1.500000e+00
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
-; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], ptr [[TMP7]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP6]], ptr [[TMP8]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -1301,8 +1274,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC1_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC1_INTERL2-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1332,9 +1305,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ <float 1.000000e+00, float 1.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1345,8 +1317,8 @@ define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], ptr [[ARRAYIDX]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
@@ -1365,8 +1337,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  store float %x.06, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  store float %x.06, ptr %arrayidx, align 4
   %conv1 = fadd fast float %x.06, 5.000000e-01
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -1381,7 +1353,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 
-define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
+define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL1-LABEL: @non_primary_iv_float_scalar(
 ; VEC4_INTERL1-NEXT:  entry:
 ; VEC4_INTERL1-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -1394,15 +1366,14 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       vector.body:
 ; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
 ; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
-; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
-; VEC4_INTERL1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
 ; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
 ; VEC4_INTERL1-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC4_INTERL1:       pred.store.if:
-; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; VEC4_INTERL1-NEXT:    store float [[TMP0]], float* [[TMP5]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; VEC4_INTERL1-NEXT:    store float [[TMP0]], ptr [[TMP5]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC4_INTERL1:       pred.store.continue:
 ; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
@@ -1410,8 +1381,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       pred.store.if3:
 ; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
 ; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
-; VEC4_INTERL1-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
+; VEC4_INTERL1-NEXT:    store float [[TMP7]], ptr [[TMP9]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VEC4_INTERL1:       pred.store.continue4:
 ; VEC4_INTERL1-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
@@ -1419,8 +1390,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       pred.store.if5:
 ; VEC4_INTERL1-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 2
 ; VEC4_INTERL1-NEXT:    [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
-; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; VEC4_INTERL1-NEXT:    store float [[TMP11]], float* [[TMP13]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; VEC4_INTERL1-NEXT:    store float [[TMP11]], ptr [[TMP13]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; VEC4_INTERL1:       pred.store.continue6:
 ; VEC4_INTERL1-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
@@ -1428,8 +1399,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       pred.store.if7:
 ; VEC4_INTERL1-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 3
 ; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
-; VEC4_INTERL1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
-; VEC4_INTERL1-NEXT:    store float [[TMP15]], float* [[TMP17]], align 4
+; VEC4_INTERL1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]]
+; VEC4_INTERL1-NEXT:    store float [[TMP15]], ptr [[TMP17]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VEC4_INTERL1:       pred.store.continue8:
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -1445,12 +1416,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL1:       for.body:
 ; VEC4_INTERL1-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL1-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL1-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC4_INTERL1-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC4_INTERL1-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC4_INTERL1-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC4_INTERL1-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC4_INTERL1-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC4_INTERL1:       if.pred:
-; VEC4_INTERL1-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC4_INTERL1-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC4_INTERL1-NEXT:    br label [[FOR_INC]]
 ; VEC4_INTERL1:       for.inc:
 ; VEC4_INTERL1-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1473,19 +1444,17 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ]
 ; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 4
-; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
-; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 4
-; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
-; VEC4_INTERL2-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4
+; VEC4_INTERL2-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD2]], zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP6]], i64 0
 ; VEC4_INTERL2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC4_INTERL2:       pred.store.if:
-; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; VEC4_INTERL2-NEXT:    store float [[TMP0]], float* [[TMP9]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; VEC4_INTERL2-NEXT:    store float [[TMP0]], ptr [[TMP9]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC4_INTERL2:       pred.store.continue:
 ; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1
@@ -1493,8 +1462,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if4:
 ; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 1
 ; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
-; VEC4_INTERL2-NEXT:    store float [[TMP11]], float* [[TMP13]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
+; VEC4_INTERL2-NEXT:    store float [[TMP11]], ptr [[TMP13]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; VEC4_INTERL2:       pred.store.continue5:
 ; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2
@@ -1502,8 +1471,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if6:
 ; VEC4_INTERL2-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 2
 ; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
-; VEC4_INTERL2-NEXT:    store float [[TMP15]], float* [[TMP17]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]]
+; VEC4_INTERL2-NEXT:    store float [[TMP15]], ptr [[TMP17]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; VEC4_INTERL2:       pred.store.continue7:
 ; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3
@@ -1511,16 +1480,16 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if8:
 ; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = or i64 [[INDEX]], 3
 ; VEC4_INTERL2-NEXT:    [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]]
-; VEC4_INTERL2-NEXT:    store float [[TMP19]], float* [[TMP21]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]]
+; VEC4_INTERL2-NEXT:    store float [[TMP19]], ptr [[TMP21]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; VEC4_INTERL2:       pred.store.continue9:
 ; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0
 ; VEC4_INTERL2-NEXT:    br i1 [[TMP22]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; VEC4_INTERL2:       pred.store.if10:
 ; VEC4_INTERL2-NEXT:    [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
-; VEC4_INTERL2-NEXT:    store float [[TMP23]], float* [[TMP24]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; VEC4_INTERL2-NEXT:    store float [[TMP23]], ptr [[TMP24]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; VEC4_INTERL2:       pred.store.continue11:
 ; VEC4_INTERL2-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1
@@ -1528,8 +1497,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if12:
 ; VEC4_INTERL2-NEXT:    [[TMP27:%.*]] = or i64 [[INDEX]], 5
 ; VEC4_INTERL2-NEXT:    [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]]
-; VEC4_INTERL2-NEXT:    store float [[TMP26]], float* [[TMP28]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]]
+; VEC4_INTERL2-NEXT:    store float [[TMP26]], ptr [[TMP28]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; VEC4_INTERL2:       pred.store.continue13:
 ; VEC4_INTERL2-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2
@@ -1537,8 +1506,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if14:
 ; VEC4_INTERL2-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 6
 ; VEC4_INTERL2-NEXT:    [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
-; VEC4_INTERL2-NEXT:    store float [[TMP30]], float* [[TMP32]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
+; VEC4_INTERL2-NEXT:    store float [[TMP30]], ptr [[TMP32]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; VEC4_INTERL2:       pred.store.continue15:
 ; VEC4_INTERL2-NEXT:    [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3
@@ -1546,8 +1515,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.if16:
 ; VEC4_INTERL2-NEXT:    [[TMP35:%.*]] = or i64 [[INDEX]], 7
 ; VEC4_INTERL2-NEXT:    [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00
-; VEC4_INTERL2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]]
-; VEC4_INTERL2-NEXT:    store float [[TMP34]], float* [[TMP36]], align 4
+; VEC4_INTERL2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]]
+; VEC4_INTERL2-NEXT:    store float [[TMP34]], ptr [[TMP36]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE16]]
 ; VEC4_INTERL2:       pred.store.continue17:
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -1563,12 +1532,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC4_INTERL2:       for.body:
 ; VEC4_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC4_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC4_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC4_INTERL2-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC4_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC4_INTERL2-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC4_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC4_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC4_INTERL2:       if.pred:
-; VEC4_INTERL2-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC4_INTERL2-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC4_INTERL2-NEXT:    br label [[FOR_INC]]
 ; VEC4_INTERL2:       for.inc:
 ; VEC4_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1591,21 +1560,21 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
 ; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
 ; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
-; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
-; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP1]], align 4
-; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = load float, float* [[TMP2]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDUCTION2]]
+; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4
+; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
 ; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fcmp fast oeq float [[TMP3]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fcmp fast oeq float [[TMP4]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC1_INTERL2:       pred.store.if:
-; VEC1_INTERL2-NEXT:    store float [[TMP0]], float* [[TMP1]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP0]], ptr [[TMP1]], align 4
 ; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC1_INTERL2:       pred.store.continue:
 ; VEC1_INTERL2-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
 ; VEC1_INTERL2:       pred.store.if3:
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC1_INTERL2-NEXT:    store float [[TMP7]], float* [[TMP2]], align 4
+; VEC1_INTERL2-NEXT:    store float [[TMP7]], ptr [[TMP2]], align 4
 ; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; VEC1_INTERL2:       pred.store.continue4:
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -1621,12 +1590,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC1_INTERL2:       for.body:
 ; VEC1_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; VEC1_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; VEC1_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC1_INTERL2-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC1_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC1_INTERL2-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC1_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC1_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC1_INTERL2:       if.pred:
-; VEC1_INTERL2-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC1_INTERL2-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC1_INTERL2-NEXT:    br label [[FOR_INC]]
 ; VEC1_INTERL2:       for.inc:
 ; VEC1_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1648,15 +1617,14 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE:       vector.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.if:
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP0]], float* [[TMP5]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP0]], ptr [[TMP5]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.continue:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
@@ -1664,8 +1632,8 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE:       pred.store.if3:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP7]], ptr [[TMP9]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VEC2_INTERL1_PRED_STORE:       pred.store.continue4:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -1677,12 +1645,12 @@ define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE:       for.body:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[CAST_VTC]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
 ; VEC2_INTERL1_PRED_STORE:       if.pred:
-; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[J]], float* [[VAR0]], align 4
+; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[J]], ptr [[VAR0]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[FOR_INC]]
 ; VEC2_INTERL1_PRED_STORE:       for.inc:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -1698,13 +1666,13 @@ entry:
 for.body:
   %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ]
   %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ]
-  %var0 = getelementptr inbounds float, float* %A, i64 %i
-  %var1 = load float, float* %var0, align 4
+  %var0 = getelementptr inbounds float, ptr %A, i64 %i
+  %var1 = load float, ptr %var0, align 4
   %var2 = fcmp fast oeq float %var1, 0.0
   br i1 %var2, label %if.pred, label %for.inc
 
 if.pred:
-  store float %j, float* %var0, align 4
+  store float %j, ptr %var0, align 4
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/float-reduction.ll b/llvm/test/Transforms/LoopVectorize/float-reduction.ll
index 28dc209..ea8c0a0 100644
--- a/llvm/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-reduction.ll
@@ -4,15 +4,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @foo(
 ;CHECK: fadd fast <4 x float>
 ;CHECK: ret
-define float @foo(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foo(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -26,15 +26,15 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: @foosub(
 ;CHECK: fsub fast <4 x float>
 ;CHECK: ret
-define float @foosub(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foosub(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 0.000000e+00, %entry ], [ %sub, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fsub fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -48,15 +48,15 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: @foodiv(
 ;CHECK: fdiv fast <4 x float>
 ;CHECK: ret
-define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foodiv(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fdiv fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -70,15 +70,15 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: @foonodiv(
 ;CHECK-NOT: fdiv fast <4 x float>
 ;CHECK: ret
-define float @foonodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+define float @foonodiv(ptr nocapture %A, ptr nocapture %n) nounwind uwtable readonly ssp {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fdiv fast float %0, %sum.04
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/fneg.ll b/llvm/test/Transforms/LoopVectorize/fneg.ll
index bc8906e..2b2627a 100644
--- a/llvm/test/Transforms/LoopVectorize/fneg.ll
+++ b/llvm/test/Transforms/LoopVectorize/fneg.ll
@@ -1,20 +1,20 @@
 ; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 
-define void @foo(float* %a, i64 %n) {
+define void @foo(ptr %a, i64 %n) {
 ; CHECK:       vector.body:
-; CHECK:         [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
+; CHECK:         [[WIDE_LOAD:%.*]] = load <4 x float>, ptr {{.*}}, align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
-; CHECK:         store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
+; CHECK:         store <4 x float> [[TMP4]], ptr {{.*}}, align 4
 ;
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %sub = fneg float %0
-  store float %sub, float* %arrayidx, align 4
+  store float %sub, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.exit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/followup.ll b/llvm/test/Transforms/LoopVectorize/followup.ll
index 56f093a..19db210 100644
--- a/llvm/test/Transforms/LoopVectorize/followup.ll
+++ b/llvm/test/Transforms/LoopVectorize/followup.ll
@@ -4,16 +4,16 @@
 ;
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @followup(i32* nocapture %a, i32 %n) {
+define void @followup(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
index dccf338..dc795ee 100644
--- a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 ;;;; Derived from the following C code
-;; void forked_ptrs_different_base_same_offset(float *A, float *B, float *C, int *D) {
+;; void forked_ptrs_different_base_same_offset(ptr A, ptr B, ptr C, int *D) {
 ;;   for (int i=0; i<100; i++) {
 ;;     if (D[i] != 0) {
 ;;       C[i] = A[i];
@@ -14,18 +14,18 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ;;   }
 ;; }
 
-define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+define dso_local void @forked_ptrs_different_base_same_offset(ptr nocapture readonly %Base1, ptr nocapture readonly %Base2, ptr nocapture %Dest, ptr nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_different_base_same_offset(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BASE1_FR:%.*]] = freeze float* [[BASE1:%.*]]
-; CHECK-NEXT:    [[BASE2_FR:%.*]] = freeze float* [[BASE2:%.*]]
-; CHECK-NEXT:    [[DEST_FR:%.*]] = freeze float* [[DEST:%.*]]
+; CHECK-NEXT:    [[BASE1_FR:%.*]] = freeze ptr [[BASE1:%.*]]
+; CHECK-NEXT:    [[BASE2_FR:%.*]] = freeze ptr [[BASE2:%.*]]
+; CHECK-NEXT:    [[DEST_FR:%.*]] = freeze ptr [[DEST:%.*]]
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[DEST1:%.*]] = ptrtoint float* [[DEST_FR]] to i64
-; CHECK-NEXT:    [[PREDS2:%.*]] = ptrtoint i32* [[PREDS:%.*]] to i64
-; CHECK-NEXT:    [[BASE23:%.*]] = ptrtoint float* [[BASE2_FR]] to i64
-; CHECK-NEXT:    [[BASE15:%.*]] = ptrtoint float* [[BASE1_FR]] to i64
+; CHECK-NEXT:    [[DEST1:%.*]] = ptrtoint ptr [[DEST_FR]] to i64
+; CHECK-NEXT:    [[PREDS2:%.*]] = ptrtoint ptr [[PREDS:%.*]] to i64
+; CHECK-NEXT:    [[BASE23:%.*]] = ptrtoint ptr [[BASE2_FR]] to i64
+; CHECK-NEXT:    [[BASE15:%.*]] = ptrtoint ptr [[BASE1_FR]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[DEST1]], [[PREDS2]]
 ; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[DEST1]], [[BASE23]]
@@ -36,40 +36,38 @@ define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture r
 ; CHECK-NEXT:    [[CONFLICT_RDX8:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK7]]
 ; CHECK-NEXT:    br i1 [[CONFLICT_RDX8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float*> poison, float* [[BASE2_FR]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT]], <4 x float*> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x float*> poison, float* [[BASE1_FR]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT9]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE2_FR]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[BASE1_FR]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[PREDS]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x float*> [[BROADCAST_SPLAT]], <4 x float*> [[BROADCAST_SPLAT10]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float*> [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float*> [[TMP9]], i64 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float*> [[TMP9]], i64 2
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float*> [[TMP9]], i64 3
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP11]], align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP13]], align 4
-; CHECK-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP15]], align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x ptr> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT10]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 2
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x ptr> [[TMP9]], i64 3
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load float, ptr [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = load float, ptr [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = load float, ptr [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = load float, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> poison, float [[TMP18]], i64 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i64 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP20]], i64 2
 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP21]], i64 3
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[DEST_FR]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[TMP25]], <4 x float>* [[TMP27]], align 4
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[DEST_FR]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x float> [[TMP25]], ptr [[TMP26]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -82,14 +80,14 @@ define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture r
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP29]], 0
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2_FR]], float* [[BASE1_FR]]
-; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, float* [[SPEC_SELECT]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, float* [[DOTSINK_IN]], align 4
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[DEST_FR]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP30]], align 4
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], ptr [[BASE2_FR]], ptr [[BASE1_FR]]
+; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, ptr [[SPEC_SELECT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, ptr [[DOTSINK_IN]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[DEST_FR]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[DOTSINK]], ptr [[TMP30]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -102,21 +100,21 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1.not = icmp eq i32 %0, 0
-  %spec.select = select i1 %cmp1.not, float* %Base2, float* %Base1
-  %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
-  %.sink = load float, float* %.sink.in, align 4
-  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
-  store float %.sink, float* %1, align 4
+  %spec.select = select i1 %cmp1.not, ptr %Base2, ptr %Base1
+  %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
+  %.sink = load float, ptr %.sink.in, align 4
+  %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
+  store float %.sink, ptr %1, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
 ;;;; Derived from the following C code
-;; void forked_ptrs_same_base_different_offset(float *A, float *B, int *C) {
+;; void forked_ptrs_same_base_different_offset(ptr A, ptr B, int *C) {
 ;;   int offset;
 ;;   for (int i = 0; i < 100; i++) {
 ;;     if (C[i] != 0)
@@ -127,7 +125,7 @@ for.body:
 ;;   }
 ;; }
 
-define dso_local void @forked_ptrs_same_base_different_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+define dso_local void @forked_ptrs_same_base_different_offset(ptr nocapture readonly %Base, ptr nocapture %Dest, ptr nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_same_base_different_offset(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -136,18 +134,18 @@ define dso_local void @forked_ptrs_same_base_different_offset(float* nocapture r
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PREDS:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_014]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[IDXPROM213:%.*]] = zext i32 [[OFFSET_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[BASE:%.*]], i64 [[IDXPROM213]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[BASE:%.*]], i64 [[IDXPROM213]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DEST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP2]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ;
@@ -160,18 +158,18 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1.not = icmp eq i32 %0, 0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %add = add nuw nsw i32 %i.014, 1
   %1 = trunc i64 %indvars.iv to i32
   %offset.0 = select i1 %cmp1.not, i32 %add, i32 %1
   %idxprom213 = zext i32 %offset.0 to i64
-  %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
-  %2 = load float, float* %arrayidx3, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
-  store float %2, float* %arrayidx5, align 4
+  %arrayidx3 = getelementptr inbounds float, ptr %Base, i64 %idxprom213
+  %2 = load float, ptr %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
+  store float %2, ptr %arrayidx5, align 4
   %exitcond.not = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
diff --git a/llvm/test/Transforms/LoopVectorize/funcall.ll b/llvm/test/Transforms/LoopVectorize/funcall.ll
index c5d3066..c4e6124 100644
--- a/llvm/test/Transforms/LoopVectorize/funcall.ll
+++ b/llvm/test/Transforms/LoopVectorize/funcall.ll
@@ -10,16 +10,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @test(
 ; CHECK: <2 x double>
 
-define void @test(double* %d, double %t) {
+define void @test(ptr %d, double %t) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %d, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %d, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = tail call double @llvm.pow.f64(double %0, double %t)
-  store double %1, double* %arrayidx, align 8
+  store double %1, ptr %arrayidx, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
diff --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
index 7f8abf3f..0b75751 100644
--- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -43,13 +43,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
@@ -83,8 +83,8 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
 
 .lr.ph5:                                          ; preds = %0, %.lr.ph5
   %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
-  %3 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv6
-  store i32 %x, i32* %3, align 4
+  %3 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv6
+  store i32 %x, ptr %3, align 4
   %indvars.iv.next7 = add i64 %indvars.iv6, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -94,13 +94,13 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
   %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
   %4 = add nsw i32 %.02, -1
-  %5 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %6 = load i32, i32* %5, align 4
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %8 = load i32, i32* %7, align 4
+  %5 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %6 = load i32, ptr %5, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %8 = load i32, ptr %7, align 4
   %9 = and i32 %8, %6
-  %10 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %9, i32* %10, align 4
+  %10 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %9, ptr %10, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %11 = icmp eq i32 %4, 0
   br i1 %11, label %._crit_edge, label %.lr.ph
@@ -118,19 +118,19 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
 ;UNROLL: <4 x i32>
 ;UNROLL: <4 x i32>
 ;UNROLL: ret void
-define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q) nounwind uwtable ssp {
   %1 = icmp eq i32 %n, 0
   br i1 %1, label %._crit_edge, label %.lr.ph
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
-  %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
-  %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+  %.014 = phi ptr [ %5, %.lr.ph ], [ %p, %0 ]
+  %.023 = phi ptr [ %3, %.lr.ph ], [ %q, %0 ]
   %2 = add nsw i32 %.05, -1
-  %3 = getelementptr inbounds i32, i32* %.023, i64 1
-  %4 = load i32, i32* %.023, align 16
-  %5 = getelementptr inbounds i32, i32* %.014, i64 1
-  store i32 %4, i32* %.014, align 16
+  %3 = getelementptr inbounds i32, ptr %.023, i64 1
+  %4 = load i32, ptr %.023, align 16
+  %5 = getelementptr inbounds i32, ptr %.014, i64 1
+  store i32 %4, ptr %.014, align 16
   %6 = icmp eq i32 %2, 0
   br i1 %6, label %._crit_edge, label %.lr.ph
 
@@ -147,7 +147,7 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 ;UNROLL: load <4 x i32>
 ;UNROLL: load <4 x i32>
 ;UNROLL: ret void
-define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+define void @example4(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q) nounwind uwtable ssp {
   %1 = add nsw i32 %n, -1
   %2 = icmp eq i32 %n, 0
   br i1 %2, label %.preheader4, label %.lr.ph10
@@ -158,13 +158,13 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 
 .lr.ph10:                                         ; preds = %0, %.lr.ph10
   %4 = phi i32 [ %9, %.lr.ph10 ], [ %1, %0 ]
-  %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ]
-  %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ]
-  %5 = getelementptr inbounds i32, i32* %.027, i64 1
-  %6 = load i32, i32* %.027, align 16
+  %.018 = phi ptr [ %8, %.lr.ph10 ], [ %p, %0 ]
+  %.027 = phi ptr [ %5, %.lr.ph10 ], [ %q, %0 ]
+  %5 = getelementptr inbounds i32, ptr %.027, i64 1
+  %6 = load i32, ptr %.027, align 16
   %7 = add nsw i32 %6, 5
-  %8 = getelementptr inbounds i32, i32* %.018, i64 1
-  store i32 %7, i32* %.018, align 16
+  %8 = getelementptr inbounds i32, ptr %.018, i64 1
+  store i32 %7, ptr %.018, align 16
   %9 = add nsw i32 %4, -1
   %10 = icmp eq i32 %4, 0
   br i1 %10, label %._crit_edge, label %.lr.ph10
@@ -175,26 +175,26 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
 .lr.ph6:                                          ; preds = %.preheader4, %.lr.ph6
   %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ]
   %indvars.iv.next12 = add i64 %indvars.iv11, 1
-  %11 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12
-  %12 = load i32, i32* %11, align 4
+  %11 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv.next12
+  %12 = load i32, ptr %11, align 4
   %13 = add nsw i64 %indvars.iv11, 3
-  %14 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %13
-  %15 = load i32, i32* %14, align 4
+  %14 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %13
+  %15 = load i32, ptr %14, align 4
   %16 = add nsw i32 %15, %12
-  %17 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv11
-  store i32 %16, i32* %17, align 4
+  %17 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv11
+  store i32 %16, ptr %17, align 4
   %lftr.wideiv13 = trunc i64 %indvars.iv.next12 to i32
   %exitcond14 = icmp eq i32 %lftr.wideiv13, %1
   br i1 %exitcond14, label %.preheader, label %.lr.ph6
 
 .lr.ph:                                           ; preds = %.preheader, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ]
-  %18 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %19 = load i32, i32* %18, align 4
+  %18 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %19 = load i32, ptr %18, align 4
   %20 = icmp sgt i32 %19, 4
   %21 = select i1 %20, i32 4, i32 0
-  %22 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  store i32 %21, i32* %22, align 4
+  %22 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  store i32 %21, ptr %22, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %1
@@ -222,8 +222,8 @@ define void @example8(i32 %x) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %.preheader
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [32 x [1024 x i32]], [32 x [1024 x i32]]* @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv
-  store i32 %x, i32* %2, align 4
+  %2 = getelementptr inbounds [32 x [1024 x i32]], ptr @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv
+  store i32 %x, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -248,10 +248,10 @@ define i32 @example9() nounwind uwtable readonly ssp {
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %diff.01 = phi i32 [ 0, %0 ], [ %7, %1 ]
-  %2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [1024 x i32], ptr @ub, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [1024 x i32], ptr @uc, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add i32 %3, %diff.01
   %7 = sub i32 %6, %5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -270,25 +270,25 @@ define i32 @example9() nounwind uwtable readonly ssp {
 ;CHECK: add <4 x i16>
 ;CHECK: store <4 x i16>
 ;CHECK: ret void
-define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10a(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i32, i32* %ib, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %ic, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %ib, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %ic, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
-  %8 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %9 = load i16, i16* %8, align 2
-  %10 = getelementptr inbounds i16, i16* %sc, i64 %indvars.iv
-  %11 = load i16, i16* %10, align 2
+  %7 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
+  %8 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %9 = load i16, ptr %8, align 2
+  %10 = getelementptr inbounds i16, ptr %sc, i64 %indvars.iv
+  %11 = load i16, ptr %10, align 2
   %12 = add i16 %11, %9
-  %13 = getelementptr inbounds i16, i16* %sa, i64 %indvars.iv
-  store i16 %12, i16* %13, align 2
+  %13 = getelementptr inbounds i16, ptr %sa, i64 %indvars.iv
+  store i16 %12, ptr %13, align 2
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -303,16 +303,16 @@ define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb,
 ;CHECK: sext <4 x i16>
 ;CHECK: store <4 x i32>
 ;CHECK: ret void
-define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+define void @example10b(ptr noalias nocapture %sa, ptr noalias nocapture %sb, ptr noalias nocapture %sc, ptr noalias nocapture %ia, ptr noalias nocapture %ib, ptr noalias nocapture %ic) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16, i16* %2, align 2
+  %2 = getelementptr inbounds i16, ptr %sb, i64 %indvars.iv
+  %3 = load i16, ptr %2, align 2
   %4 = sext i16 %3 to i32
-  %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
-  store i32 %4, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %ia, i64 %indvars.iv
+  store i32 %4, ptr %5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -339,24 +339,24 @@ define void @example11() nounwind uwtable ssp {
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = shl nsw i64 %indvars.iv, 1
   %3 = or i64 %2, 1
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %3
-  %5 = load i32, i32* %4, align 4
-  %6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %3
-  %7 = load i32, i32* %6, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %3
+  %5 = load i32, ptr %4, align 4
+  %6 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %3
+  %7 = load i32, ptr %6, align 4
   %8 = mul nsw i32 %7, %5
-  %9 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %2
-  %10 = load i32, i32* %9, align 8
-  %11 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %2
-  %12 = load i32, i32* %11, align 8
+  %9 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %2
+  %10 = load i32, ptr %9, align 8
+  %11 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %2
+  %12 = load i32, ptr %11, align 8
   %13 = mul nsw i32 %12, %10
   %14 = sub nsw i32 %8, %13
-  %15 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %14, i32* %15, align 4
+  %15 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %14, ptr %15, align 4
   %16 = mul nsw i32 %7, %10
   %17 = mul nsw i32 %12, %5
   %18 = add nsw i32 %17, %16
-  %19 = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 %indvars.iv
-  store i32 %18, i32* %19, align 4
+  %19 = getelementptr inbounds [2048 x i32], ptr @d, i64 0, i64 %indvars.iv
+  store i32 %18, ptr %19, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
@@ -376,9 +376,9 @@ define void @example12() {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %3 = trunc i64 %indvars.iv to i32
-  store i32 %3, i32* %2, align 4
+  store i32 %3, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -391,24 +391,24 @@ define void @example12() {
 ;CHECK-LABEL: @example13(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
-define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
+define void @example13(ptr nocapture %A, ptr nocapture %B, ptr nocapture %out) nounwind uwtable ssp {
   br label %.preheader
 
 .preheader:                                       ; preds = %14, %0
   %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ]
-  %1 = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv4
-  %2 = load i32*, i32** %1, align 8
-  %3 = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv4
-  %4 = load i32*, i32** %3, align 8
+  %1 = getelementptr inbounds ptr, ptr %A, i64 %indvars.iv4
+  %2 = load ptr, ptr %1, align 8
+  %3 = getelementptr inbounds ptr, ptr %B, i64 %indvars.iv4
+  %4 = load ptr, ptr %3, align 8
   br label %5
 
 ; <label>:5                                       ; preds = %.preheader, %5
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ]
   %diff.02 = phi i32 [ 0, %.preheader ], [ %11, %5 ]
-  %6 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv
-  %7 = load i32, i32* %6, align 4
-  %8 = getelementptr inbounds i32, i32* %4, i64 %indvars.iv
-  %9 = load i32, i32* %8, align 4
+  %6 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv
+  %7 = load i32, ptr %6, align 4
+  %8 = getelementptr inbounds i32, ptr %4, i64 %indvars.iv
+  %9 = load i32, ptr %8, align 4
   %10 = add i32 %7, %diff.02
   %11 = sub i32 %10, %9
   %indvars.iv.next = add i64 %indvars.iv, 8
@@ -417,8 +417,8 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
   br i1 %13, label %5, label %14
 
 ; <label>:14                                      ; preds = %5
-  %15 = getelementptr inbounds i32, i32* %out, i64 %indvars.iv4
-  store i32 %11, i32* %15, align 4
+  %15 = getelementptr inbounds i32, ptr %out, i64 %indvars.iv4
+  store i32 %11, ptr %15, align 4
   %indvars.iv.next5 = add i64 %indvars.iv4, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next5 to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 32
@@ -432,7 +432,7 @@ define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %o
 ;CHECK-LABEL: @example14(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
-define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
+define void @example14(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out) nounwind uwtable ssp {
 .preheader3:
   br label %.preheader
 
@@ -444,14 +444,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
 ; <label>:0                                       ; preds = %0, %.preheader
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ]
   %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ]
-  %1 = getelementptr inbounds i32*, i32** %in, i64 %indvars.iv
-  %2 = load i32*, i32** %1, align 8
-  %3 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv7
-  %4 = load i32, i32* %3, align 4
-  %5 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv
-  %6 = load i32*, i32** %5, align 8
-  %7 = getelementptr inbounds i32, i32* %6, i64 %indvars.iv7
-  %8 = load i32, i32* %7, align 4
+  %1 = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv
+  %2 = load ptr, ptr %1, align 8
+  %3 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv7
+  %4 = load i32, ptr %3, align 4
+  %5 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv
+  %6 = load ptr, ptr %5, align 8
+  %7 = getelementptr inbounds i32, ptr %6, i64 %indvars.iv7
+  %8 = load i32, ptr %7, align 4
   %9 = mul nsw i32 %8, %4
   %10 = add nsw i32 %9, %sum.12
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -466,7 +466,7 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10, label %.preheader3.1, label %.preheader
 
 .preheader3.1:                                    ; preds = %11
-  store i32 %10, i32* %out, align 4
+  store i32 %10, ptr %out, align 4
   br label %.preheader.1
 
 .preheader.1:                                     ; preds = %24, %.preheader3.1
@@ -478,14 +478,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   %indvars.iv.1 = phi i64 [ 0, %.preheader.1 ], [ %13, %12 ]
   %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ]
   %13 = add nsw i64 %indvars.iv.1, 1
-  %14 = getelementptr inbounds i32*, i32** %in, i64 %13
-  %15 = load i32*, i32** %14, align 8
-  %16 = getelementptr inbounds i32, i32* %15, i64 %indvars.iv7.1
-  %17 = load i32, i32* %16, align 4
-  %18 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.1
-  %19 = load i32*, i32** %18, align 8
-  %20 = getelementptr inbounds i32, i32* %19, i64 %indvars.iv7.1
-  %21 = load i32, i32* %20, align 4
+  %14 = getelementptr inbounds ptr, ptr %in, i64 %13
+  %15 = load ptr, ptr %14, align 8
+  %16 = getelementptr inbounds i32, ptr %15, i64 %indvars.iv7.1
+  %17 = load i32, ptr %16, align 4
+  %18 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv.1
+  %19 = load ptr, ptr %18, align 8
+  %20 = getelementptr inbounds i32, ptr %19, i64 %indvars.iv7.1
+  %21 = load i32, ptr %20, align 4
   %22 = mul nsw i32 %21, %17
   %23 = add nsw i32 %22, %sum.12.1
   %lftr.wideiv.1 = trunc i64 %13 to i32
@@ -499,8 +499,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10.1, label %.preheader3.2, label %.preheader.1
 
 .preheader3.2:                                    ; preds = %24
-  %25 = getelementptr inbounds i32, i32* %out, i64 1
-  store i32 %23, i32* %25, align 4
+  %25 = getelementptr inbounds i32, ptr %out, i64 1
+  store i32 %23, ptr %25, align 4
   br label %.preheader.2
 
 .preheader.2:                                     ; preds = %38, %.preheader3.2
@@ -512,14 +512,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   %indvars.iv.2 = phi i64 [ 0, %.preheader.2 ], [ %indvars.iv.next.2, %26 ]
   %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ]
   %27 = add nsw i64 %indvars.iv.2, 2
-  %28 = getelementptr inbounds i32*, i32** %in, i64 %27
-  %29 = load i32*, i32** %28, align 8
-  %30 = getelementptr inbounds i32, i32* %29, i64 %indvars.iv7.2
-  %31 = load i32, i32* %30, align 4
-  %32 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.2
-  %33 = load i32*, i32** %32, align 8
-  %34 = getelementptr inbounds i32, i32* %33, i64 %indvars.iv7.2
-  %35 = load i32, i32* %34, align 4
+  %28 = getelementptr inbounds ptr, ptr %in, i64 %27
+  %29 = load ptr, ptr %28, align 8
+  %30 = getelementptr inbounds i32, ptr %29, i64 %indvars.iv7.2
+  %31 = load i32, ptr %30, align 4
+  %32 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv.2
+  %33 = load ptr, ptr %32, align 8
+  %34 = getelementptr inbounds i32, ptr %33, i64 %indvars.iv7.2
+  %35 = load i32, ptr %34, align 4
   %36 = mul nsw i32 %35, %31
   %37 = add nsw i32 %36, %sum.12.2
   %indvars.iv.next.2 = add i64 %indvars.iv.2, 1
@@ -534,8 +534,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10.2, label %.preheader3.3, label %.preheader.2
 
 .preheader3.3:                                    ; preds = %38
-  %39 = getelementptr inbounds i32, i32* %out, i64 2
-  store i32 %37, i32* %39, align 4
+  %39 = getelementptr inbounds i32, ptr %out, i64 2
+  store i32 %37, ptr %39, align 4
   br label %.preheader.3
 
 .preheader.3:                                     ; preds = %52, %.preheader3.3
@@ -547,14 +547,14 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   %indvars.iv.3 = phi i64 [ 0, %.preheader.3 ], [ %indvars.iv.next.3, %40 ]
   %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ]
   %41 = add nsw i64 %indvars.iv.3, 3
-  %42 = getelementptr inbounds i32*, i32** %in, i64 %41
-  %43 = load i32*, i32** %42, align 8
-  %44 = getelementptr inbounds i32, i32* %43, i64 %indvars.iv7.3
-  %45 = load i32, i32* %44, align 4
-  %46 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.3
-  %47 = load i32*, i32** %46, align 8
-  %48 = getelementptr inbounds i32, i32* %47, i64 %indvars.iv7.3
-  %49 = load i32, i32* %48, align 4
+  %42 = getelementptr inbounds ptr, ptr %in, i64 %41
+  %43 = load ptr, ptr %42, align 8
+  %44 = getelementptr inbounds i32, ptr %43, i64 %indvars.iv7.3
+  %45 = load i32, ptr %44, align 4
+  %46 = getelementptr inbounds ptr, ptr %coeff, i64 %indvars.iv.3
+  %47 = load ptr, ptr %46, align 8
+  %48 = getelementptr inbounds i32, ptr %47, i64 %indvars.iv7.3
+  %49 = load i32, ptr %48, align 4
   %50 = mul nsw i32 %49, %45
   %51 = add nsw i32 %50, %sum.12.3
   %indvars.iv.next.3 = add i64 %indvars.iv.3, 1
@@ -569,8 +569,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   br i1 %exitcond10.3, label %53, label %.preheader.3
 
 ; <label>:53                                      ; preds = %52
-  %54 = getelementptr inbounds i32, i32* %out, i64 3
-  store i32 %51, i32* %54, align 4
+  %54 = getelementptr inbounds i32, ptr %out, i64 3
+  store i32 %51, ptr %54, align 4
   ret void
 }
 
@@ -578,7 +578,7 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
 ;CHECK: load <4 x i32>
 ;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
 ;CHECK: ret i32
-define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
+define i32 @example21(ptr nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -590,8 +590,8 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
   %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
   %indvars.iv.next = add i64 %indvars.iv, -1
-  %4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %5 = load i32, i32* %4, align 4
+  %4 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv.next
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %a.02
   %7 = trunc i64 %indvars.iv.next to i32
   %8 = icmp sgt i32 %7, 0
@@ -605,19 +605,19 @@ define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
 ;CHECK-LABEL: @example23(
 ;CHECK: <4 x i32>
 ;CHECK: ret void
-define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
+define void @example23(ptr nocapture %src, ptr nocapture %dst) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
-  %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
-  %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+  %.04 = phi ptr [ %src, %0 ], [ %2, %1 ]
+  %.013 = phi ptr [ %dst, %0 ], [ %6, %1 ]
   %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
-  %2 = getelementptr inbounds i16, i16* %.04, i64 1
-  %3 = load i16, i16* %.04, align 2
+  %2 = getelementptr inbounds i16, ptr %.04, i64 1
+  %3 = load i16, ptr %.04, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw nsw i32 %4, 7
-  %6 = getelementptr inbounds i32, i32* %.013, i64 1
-  store i32 %5, i32* %.013, align 4
+  %6 = getelementptr inbounds i32, ptr %.013, i64 1
+  store i32 %5, ptr %.013, align 4
   %7 = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %7, 256
   br i1 %exitcond, label %8, label %1
@@ -634,15 +634,15 @@ define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [1024 x float], [1024 x float]* @fa, i64 0, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
-  %4 = getelementptr inbounds [1024 x float], [1024 x float]* @fb, i64 0, i64 %indvars.iv
-  %5 = load float, float* %4, align 4
+  %2 = getelementptr inbounds [1024 x float], ptr @fa, i64 0, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
+  %4 = getelementptr inbounds [1024 x float], ptr @fb, i64 0, i64 %indvars.iv
+  %5 = load float, ptr %4, align 4
   %6 = fcmp olt float %3, %5
   %x.y = select i1 %6, i16 %x, i16 %y
   %7 = sext i16 %x.y to i32
-  %8 = getelementptr inbounds [1024 x i32], [1024 x i32]* @ic, i64 0, i64 %indvars.iv
-  store i32 %7, i32* %8, align 4
+  %8 = getelementptr inbounds [1024 x i32], ptr @ic, i64 0, i64 %indvars.iv
+  store i32 %7, ptr %8, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -661,20 +661,20 @@ define void @example25() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [1024 x float], [1024 x float]* @da, i64 0, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
-  %4 = getelementptr inbounds [1024 x float], [1024 x float]* @db, i64 0, i64 %indvars.iv
-  %5 = load float, float* %4, align 4
+  %2 = getelementptr inbounds [1024 x float], ptr @da, i64 0, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
+  %4 = getelementptr inbounds [1024 x float], ptr @db, i64 0, i64 %indvars.iv
+  %5 = load float, ptr %4, align 4
   %6 = fcmp olt float %3, %5
-  %7 = getelementptr inbounds [1024 x float], [1024 x float]* @dc, i64 0, i64 %indvars.iv
-  %8 = load float, float* %7, align 4
-  %9 = getelementptr inbounds [1024 x float], [1024 x float]* @dd, i64 0, i64 %indvars.iv
-  %10 = load float, float* %9, align 4
+  %7 = getelementptr inbounds [1024 x float], ptr @dc, i64 0, i64 %indvars.iv
+  %8 = load float, ptr %7, align 4
+  %9 = getelementptr inbounds [1024 x float], ptr @dd, i64 0, i64 %indvars.iv
+  %10 = load float, ptr %9, align 4
   %11 = fcmp olt float %8, %10
   %12 = and i1 %6, %11
   %13 = zext i1 %12 to i32
-  %14 = getelementptr inbounds [1024 x i32], [1024 x i32]* @dj, i64 0, i64 %indvars.iv
-  store i32 %13, i32* %14, align 4
+  %14 = getelementptr inbounds [1024 x i32], ptr @dj, i64 0, i64 %indvars.iv
+  store i32 %13, ptr %14, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll b/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll
index 33cacb3..a6c9aad 100644
--- a/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll
+++ b/llvm/test/Transforms/LoopVectorize/gep_with_bitcast.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ; Vectorization of loop with bitcast between GEP and load
 ; Simplified source code:
-;void foo (double** __restrict__  in, bool * __restrict__ res) {
+;void foo (ptr __restrict__  in, bool * __restrict__ res) {
 ;
 ;  for (int i = 0; i < 4096; ++i)
 ;    res[i] = ((unsigned long long)in[i] == 0);
@@ -13,25 +13,23 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK-LABEL: @foo
 ; CHECK: vector.body
 ; CHECK:  %[[IV:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK:  %[[v0:.+]] = getelementptr inbounds double*, double** %in, i64 %[[IV]]
-; CHECK:  %[[v1:.+]] = bitcast double** %[[v0]] to <4 x i64>*
-; CHECK:  %wide.load = load <4 x i64>, <4 x i64>* %[[v1]], align 8
+; CHECK:  %[[v0:.+]] = getelementptr inbounds ptr, ptr %in, i64 %[[IV]]
+; CHECK:  %wide.load = load <4 x i64>, ptr %[[v0]], align 8
 ; CHECK:  icmp eq <4 x i64> %wide.load, zeroinitializer
 ; CHECK:  br i1
 
-define void @foo(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 {
+define void @foo(ptr noalias nocapture readonly %in, ptr noalias nocapture readnone %out, ptr noalias nocapture %res) #0 {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double*, double** %in, i64 %indvars.iv
-  %tmp53 = bitcast double** %arrayidx to i64*
-  %tmp54 = load i64, i64* %tmp53, align 8
+  %arrayidx = getelementptr inbounds ptr, ptr %in, i64 %indvars.iv
+  %tmp54 = load i64, ptr %arrayidx, align 8
   %cmp1 = icmp eq i64 %tmp54, 0
-  %arrayidx3 = getelementptr inbounds i8, i8* %res, i64 %indvars.iv
+  %arrayidx3 = getelementptr inbounds i8, ptr %res, i64 %indvars.iv
   %frombool = zext i1 %cmp1 to i8
-  store i8 %frombool, i8* %arrayidx3, align 1
+  store i8 %frombool, ptr %arrayidx3, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 4096
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/hints-trans.ll b/llvm/test/Transforms/LoopVectorize/hints-trans.ll
index e9c059e..3c7ef44 100644
--- a/llvm/test/Transforms/LoopVectorize/hints-trans.ll
+++ b/llvm/test/Transforms/LoopVectorize/hints-trans.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @foo(i32* nocapture %b) #0 {
+define void @foo(ptr nocapture %b) #0 {
 entry:
   br label %for.body
 
@@ -13,8 +13,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 1, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 1, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 16
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
index 03433f2..3a05fe9 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -14,19 +14,19 @@ entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %indvars.iv
-  %arrayidx2 = getelementptr inbounds [1024 x float], [1024 x float]* @B, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @A, i64 0, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds [1024 x float], ptr @B, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp oeq float %0, 0.000000e+00
   br i1 %cmp3, label %if.end9, label %if.else
 
 if.else:
-  %1 = load float, float* %arrayidx, align 4
+  %1 = load float, ptr %arrayidx, align 4
   br label %if.end9
 
 if.end9:
   %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
-  store float %tmp.0, float* %arrayidx, align 4
+  store float %tmp.0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
@@ -42,24 +42,24 @@ for.end:
 ; CHECK: load <2 x float>
 ; CHECK-NOT: load <2 x float>
 
-define void @dont_hoist_cond_load([1024 x float]* %a) {
+define void @dont_hoist_cond_load(ptr %a) {
 entry:
   br label %for.body
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* %a, i64 0, i64 %indvars.iv
-  %arrayidx2 = getelementptr inbounds [1024 x float], [1024 x float]* @B, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr %a, i64 0, i64 %indvars.iv
+  %arrayidx2 = getelementptr inbounds [1024 x float], ptr @B, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp oeq float %0, 0.000000e+00
   br i1 %cmp3, label %if.end9, label %if.else
 
 if.else:
-  %1 = load float, float* %arrayidx, align 4
+  %1 = load float, ptr %arrayidx, align 4
   br label %if.end9
 
 if.end9:
   %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ]
-  store float %tmp.0, float* %arrayidx2, align 4
+  store float %tmp.0, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/i8-induction.ll b/llvm/test/Transforms/LoopVectorize/i8-induction.ll
index 182c657..220fd64 100644
--- a/llvm/test/Transforms/LoopVectorize/i8-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/i8-induction.ll
@@ -14,8 +14,8 @@ define void @f() nounwind uwtable ssp {
 ; DEBUGLOC:         %vec.ind.next = add {{.*}}, !dbg ![[DbgLoc]]
 
 scalar.ph:
-  store i8 0, i8* inttoptr (i64 1 to i8*), align 1
-  %0 = load i8, i8* @a, align 1
+  store i8 0, ptr inttoptr (i64 1 to ptr), align 1
+  %0 = load i8, ptr @a, align 1
   br label %for.body
 
 for.body:
@@ -32,7 +32,7 @@ for.body:
   br i1 %phitmp14, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
-  store i8 %mul, i8* @b, align 1
+  store i8 %mul, ptr @b, align 1
   ret void
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
index f73c595..7b3f03e 100644
--- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       icmp slt <4 x i64> %[[I]], %broadcast.splat
 ; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define i32 @more_than_one_use(i32* %a, i64 %n) {
+define i32 @more_than_one_use(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
@@ -24,8 +24,8 @@ for.body:
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   %tmp0 = select i1 %cond, i64 %i.next, i64 0
-  %tmp1 = getelementptr inbounds i32, i32* %a, i64 %tmp0
-  %tmp2 = load i32, i32* %tmp1, align 8
+  %tmp1 = getelementptr inbounds i32, ptr %a, i64 %tmp0
+  %tmp2 = load i32, ptr %tmp1, align 8
   %tmp3 = add i32 %r, %tmp2
   br i1 %cond, label %for.body, label %for.end
 
@@ -74,7 +74,7 @@ for.end:
 ; CHECK-NEXT:   EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]>
 ; CHECK-NEXT: No successor
 ; CHECK-NEXT: }
-define void @test(i32* %ptr) {
+define void @test(ptr %ptr) {
 entry:
   br label %loop
 
@@ -82,8 +82,8 @@ loop:                       ; preds = %loop, %entry
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
   %cond0 = icmp ult i64 %iv, 13
   %s = select i1 %cond0, i32 10, i32 20
-  %gep = getelementptr inbounds i32, i32* %ptr, i64 %iv
-  store i32 %s, i32* %gep
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %iv
+  store i32 %s, ptr %gep
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 14
   br i1 %exitcond, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll b/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll
index 01691b6..13eec6b 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -39,7 +39,7 @@ if.end25:                                         ; preds = %entry
 
 ; PR15990
 ; We can have basic blocks with single entry PHI nodes.
-define void @single_entry_phi(i32* %a, i32 *%b) {
+define void @single_entry_phi(ptr %a, ptr %b) {
 entry:
   br label %for.cond1.preheader
 
@@ -55,6 +55,6 @@ for.end:
 
 for.cond.for.end5:
   %and.lcssa = phi i32 [ %malicious.phi, %for.end ]
-  store i32 %and.lcssa, i32* %a, align 4
+  store i32 %and.lcssa, ptr %a, align 4
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
index fb993cc..7117a5d 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
@@ -2,9 +2,9 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-@a = global i32* null, align 8
-@b = global i32* null, align 8
-@c = global i32* null, align 8
+@a = global ptr null, align 8
+@b = global ptr null, align 8
+@c = global ptr null, align 8
 
 ; Don't create an exponetial IR for the edge masks needed when if-converting
 ; this code.
@@ -19,15 +19,15 @@ entry:
   br i1 %cmp88, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load i32*, i32** @b, align 8
-  %1 = load i32*, i32** @a, align 8
-  %2 = load i32*, i32** @c, align 8
+  %0 = load ptr, ptr @b, align 8
+  %1 = load ptr, ptr @a, align 8
+  %2 = load ptr, ptr @c, align 8
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %_ZL3fn3ii.exit58 ]
-  %arrayidx = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
-  %3 = load i32, i32* %arrayidx, align 4  %4 = trunc i64 %indvars.iv to i32
+  %arrayidx = getelementptr inbounds i32, ptr %0, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx, align 4  %4 = trunc i64 %indvars.iv to i32
   %and.i = and i32 %4, 1
   %tobool.i.i = icmp eq i32 %and.i, 0
   br i1 %tobool.i.i, label %if.end.i, label %if.then.i
@@ -133,9 +133,9 @@ if.then.i15.i:
 
 _ZL3fn3ii.exit:
   %p1.addr.0.i16.i = phi i32 [ %or.i14.i, %if.then.i15.i ], [ %p1.addr.3.i.i, %_Z3fn2iii.exit.i ]
-  %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
-  store i32 %p1.addr.0.i16.i, i32* %arrayidx2, align 4  %arrayidx4 = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
-  %10 = load i32, i32* %arrayidx4, align 4  br i1 %tobool.i.i, label %_Z3fn1ii.exit.i26, label %if.then.i.i21
+  %arrayidx2 = getelementptr inbounds i32, ptr %1, i64 %indvars.iv
+  store i32 %p1.addr.0.i16.i, ptr %arrayidx2, align 4  %arrayidx4 = getelementptr inbounds i32, ptr %0, i64 %indvars.iv
+  %10 = load i32, ptr %arrayidx4, align 4  br i1 %tobool.i.i, label %_Z3fn1ii.exit.i26, label %if.then.i.i21
 
 if.then.i.i21:
   %and.i.i18 = lshr i32 %10, 2
@@ -231,8 +231,8 @@ if.then.i15.i56:
 
 _ZL3fn3ii.exit58:
   %p1.addr.0.i16.i57 = phi i32 [ %or.i14.i55, %if.then.i15.i56 ], [ %p1.addr.3.i.i50, %_Z3fn2iii.exit.i52 ]
-  %arrayidx7 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv
-  store i32 %p1.addr.0.i16.i57, i32* %arrayidx7, align 4  %indvars.iv.next = add i64 %indvars.iv, 1
+  %arrayidx7 = getelementptr inbounds i32, ptr %2, i64 %indvars.iv
+  store i32 %p1.addr.0.i16.i57, ptr %arrayidx7, align 4  %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, %p1
   br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index 1a82f72..33ba942 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @reduction_func(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @reduction_func(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp10 = icmp sgt i32 %n, 0
   br i1 %cmp10, label %for.body, label %for.end
@@ -13,8 +13,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 30
   br i1 %cmp1, label %if.then, label %for.inc
 
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
index 6fa4553..37ed0c6 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
@@ -23,7 +23,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: add <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
-define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+define i32 @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
 entry:
   %cmp16 = icmp slt i32 %start, %end
   br i1 %cmp16, label %for.body.lr.ph, label %for.end
@@ -34,10 +34,10 @@ for.body.lr.ph:
 
 for.body:
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
-  %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx4, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx4, align 4
   %cmp5 = icmp sgt i32 %1, %2
   br i1 %cmp5, label %if.then, label %if.end
 
@@ -48,7 +48,7 @@ if.then:
 
 if.end:
   %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
-  store i32 %k.0, i32* %arrayidx, align 4
+  store i32 %k.0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %3 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %3, %end
@@ -75,7 +75,7 @@ for.end:
 ;CHECK: add <4 x i32>
 ;CHECK: select <4 x i1>
 ;CHECK: ret i32
-define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @reduction_func(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp10 = icmp sgt i32 %n, 0
   br i1 %cmp10, label %for.body, label %for.end
@@ -83,8 +83,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 30
   br i1 %cmp1, label %if.then, label %for.inc
 
@@ -105,8 +105,8 @@ for.end:                                          ; preds = %for.inc, %entry
   ret i32 %sum.0.lcssa
 }
 
-@a = common global [1 x i32*] zeroinitializer, align 8
-@c = common global i32* null, align 8
+@a = common global [1 x ptr] zeroinitializer, align 8
+@c = common global ptr null, align 8
 
 ; We use to if convert this loop. This is not safe because there is a trapping
 ; constant expression.
@@ -127,7 +127,7 @@ bb2:                                             ; preds = %bb4, %bb1
   br label %bb3
 
 bb3:                                             ; preds = %bb2
-  %_tmp1 = phi [1 x [1 x i32]]* [ undef, %bb2 ]
+  %_tmp1 = phi ptr [ undef, %bb2 ]
   br label %bb4
 
 bb4:                                             ; preds = %bb3
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
index 3687257..3496ca3 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
@@ -4,10 +4,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Test no-predication of instructions that are provably safe, e.g. dividing by
 ; a non-zero constant.
-define void @test(i32* nocapture %asd, i32* nocapture %aud,
-                  i32* nocapture %asr, i32* nocapture %aur,
-                  i32* nocapture %asd0, i32* nocapture %aud0,
-                  i32* nocapture %asr0, i32* nocapture %aur0
+define void @test(ptr nocapture %asd, ptr nocapture %aud,
+                  ptr nocapture %asr, ptr nocapture %aur,
+                  ptr nocapture %asd0, ptr nocapture %aud0,
+                  ptr nocapture %asr0, ptr nocapture %aur0
 ) {
 entry:
   br label %for.body
@@ -28,26 +28,26 @@ for.cond.cleanup:                                 ; preds = %if.end
 
 for.body:                                         ; preds = %if.end, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
-  %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
-  %iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv
-  %isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv
-  %iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv
-  %lsd = load i32, i32* %isd, align 4
-  %lud = load i32, i32* %iud, align 4
-  %lsr = load i32, i32* %isr, align 4
-  %lur = load i32, i32* %iur, align 4
+  %isd = getelementptr inbounds i32, ptr %asd, i64 %indvars.iv
+  %iud = getelementptr inbounds i32, ptr %aud, i64 %indvars.iv
+  %isr = getelementptr inbounds i32, ptr %asr, i64 %indvars.iv
+  %iur = getelementptr inbounds i32, ptr %aur, i64 %indvars.iv
+  %lsd = load i32, ptr %isd, align 4
+  %lud = load i32, ptr %iud, align 4
+  %lsr = load i32, ptr %isr, align 4
+  %lur = load i32, ptr %iur, align 4
   %psd = add nsw i32 %lsd, 23
   %pud = add nsw i32 %lud, 24
   %psr = add nsw i32 %lsr, 25
   %pur = add nsw i32 %lur, 26
-  %isd0 = getelementptr inbounds i32, i32* %asd0, i64 %indvars.iv
-  %iud0 = getelementptr inbounds i32, i32* %aud0, i64 %indvars.iv
-  %isr0 = getelementptr inbounds i32, i32* %asr0, i64 %indvars.iv
-  %iur0 = getelementptr inbounds i32, i32* %aur0, i64 %indvars.iv
-  %lsd0 = load i32, i32* %isd0, align 4
-  %lud0 = load i32, i32* %iud0, align 4
-  %lsr0 = load i32, i32* %isr0, align 4
-  %lur0 = load i32, i32* %iur0, align 4
+  %isd0 = getelementptr inbounds i32, ptr %asd0, i64 %indvars.iv
+  %iud0 = getelementptr inbounds i32, ptr %aud0, i64 %indvars.iv
+  %isr0 = getelementptr inbounds i32, ptr %asr0, i64 %indvars.iv
+  %iur0 = getelementptr inbounds i32, ptr %aur0, i64 %indvars.iv
+  %lsd0 = load i32, ptr %isd0, align 4
+  %lud0 = load i32, ptr %iud0, align 4
+  %lsr0 = load i32, ptr %isr0, align 4
+  %lur0 = load i32, ptr %iur0, align 4
   %psd0 = add nsw i32 %lsd, 27
   %pud0 = add nsw i32 %lud, 28
   %psr0 = add nsw i32 %lsr, 29
@@ -75,14 +75,14 @@ if.end:                                           ; preds = %if.then, %for.body
   %yud0.0 = phi i32 [ %rud0, %if.then ], [ %pud0, %for.body ]
   %ysr0.0 = phi i32 [ %rsr0, %if.then ], [ %psr0, %for.body ]
   %yur0.0 = phi i32 [ %rur0, %if.then ], [ %pur0, %for.body ]
-  store i32 %ysd.0, i32* %isd, align 4
-  store i32 %yud.0, i32* %iud, align 4
-  store i32 %ysr.0, i32* %isr, align 4
-  store i32 %yur.0, i32* %iur, align 4
-  store i32 %ysd0.0, i32* %isd0, align 4
-  store i32 %yud0.0, i32* %iud0, align 4
-  store i32 %ysr0.0, i32* %isr0, align 4
-  store i32 %yur0.0, i32* %iur0, align 4
+  store i32 %ysd.0, ptr %isd, align 4
+  store i32 %yud.0, ptr %iud, align 4
+  store i32 %ysr.0, ptr %isr, align 4
+  store i32 %yur.0, ptr %iur, align 4
+  store i32 %ysd0.0, ptr %isd0, align 4
+  store i32 %yud0.0, ptr %iud0, align 4
+  store i32 %ysr0.0, ptr %isr0, align 4
+  store i32 %yur0.0, ptr %iur0, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 128
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index bad9441..b82c5bb 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ;   Check vectorization of reduction code which has an fadd instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; float fcmp_0_fadd_select1(float * restrict x, const int N) {
+; float fcmp_0_fadd_select1(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -18,7 +18,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_0_fadd_select1(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -30,8 +30,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
   %add = fadd fast float %0, %sum.1
   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
@@ -48,7 +48,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fadd instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; double fcmp_0_fadd_select2(double * restrict x, const int N) {
+; double fcmp_0_fadd_select2(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -60,7 +60,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_0_fadd_select2(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -72,8 +72,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
   %add = fadd fast double %0, %sum.1
   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
@@ -91,7 +91,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and a floating-point
 ;   value.
 ;
-; float fcmp_val_fadd_select1(float * restrict x, float y, const int N) {
+; float fcmp_val_fadd_select1(ptr restrict x, float y, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > y)
@@ -103,7 +103,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %broadcast.splat
 ; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_val_fadd_select1(float* noalias %x, float %y, i32 %N) nounwind readonly {
+define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -115,8 +115,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, %y
   %add = fadd fast float %0, %sum.1
   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
@@ -134,7 +134,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and a floating-point
 ;   value.
 ;
-; double fcmp_val_fadd_select2(double * restrict x, double y, const int N) {
+; double fcmp_val_fadd_select2(ptr restrict x, double y, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > y)
@@ -146,7 +146,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %broadcast.splat
 ; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_val_fadd_select2(double* noalias %x, double %y, i32 %N) nounwind readonly {
+define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -158,8 +158,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %header, %for.body
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, %y
   %add = fadd fast double %0, %sum.1
   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
@@ -177,7 +177,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and another array
 ;   element.
 ;
-; float fcmp_array_elm_fadd_select1(float * restrict x, float * restrict y,
+; float fcmp_array_elm_fadd_select1(ptr restrict x, ptr restrict y,
 ;                                   const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
@@ -190,7 +190,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %[[V1:.*]]
 ; CHECK: %[[V4:.*]] = fadd fast <4 x float> %[[V0]], %[[V3:.*]]
 ; CHECK: select <4 x i1> %[[V2]], <4 x float> %[[V4]], <4 x float> %[[V3]]
-define float @fcmp_array_elm_fadd_select1(float* noalias %x, float* noalias %y, i32 %N) nounwind readonly {
+define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -202,10 +202,10 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx.1, align 4
-  %arrayidx.2 = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %1 = load float, float* %arrayidx.2, align 4
+  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx.2, align 4
   %cmp.2 = fcmp fast ogt float %0, %1
   %add = fadd fast float %0, %sum.1
   %sum.2 = select i1 %cmp.2, float %add, float %sum.1
@@ -223,7 +223,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   an fcmp instruction which compares an array element and another array
 ;   element.
 ;
-; double fcmp_array_elm_fadd_select2(double * restrict x, double * restrict y,
+; double fcmp_array_elm_fadd_select2(ptr restrict x, ptr restrict y,
 ;                                    const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
@@ -236,7 +236,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %[[V1:.*]]
 ; CHECK: %[[V4:.*]] = fadd fast <4 x double> %[[V0]], %[[V3:.*]]
 ; CHECK: select <4 x i1> %[[V2]], <4 x double> %[[V4]], <4 x double> %[[V3]]
-define double @fcmp_array_elm_fadd_select2(double* noalias %x, double* noalias %y, i32 %N) nounwind readonly {
+define double @fcmp_array_elm_fadd_select2(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -248,10 +248,10 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx.1 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx.1, align 4
-  %arrayidx.2 = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %1 = load double, double* %arrayidx.2, align 4
+  %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx.2, align 4
   %cmp.2 = fcmp fast ogt double %0, %1
   %add = fadd fast double %0, %sum.1
   %sum.2 = select i1 %cmp.2, double %add, double %sum.1
@@ -268,7 +268,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fsub instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; float fcmp_0_fsub_select1(float * restrict x, const int N) {
+; float fcmp_0_fsub_select1(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -280,7 +280,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fsub fast <4 x float> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_0_fsub_select1(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fsub_select1(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -292,8 +292,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
   %sub = fsub fast float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
@@ -308,7 +308,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; Float pattern:
 ;   Check that is not vectorized if fp-instruction has no fast-math property.
-; float fcmp_0_fsub_select1_novectorize(float * restrict x, const int N) {
+; float fcmp_0_fsub_select1_novectorize(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -318,7 +318,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fsub_select1_novectorize(
 ; CHECK-NOT: <4 x float>
-define float @fcmp_0_fsub_select1_novectorize(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fsub_select1_novectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -330,8 +330,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt float %0, 0.000000e+00
   %sub = fsub float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
@@ -348,7 +348,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fsub instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; double fcmp_0_fsub_select2(double * restrict x, const int N) {
+; double fcmp_0_fsub_select2(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -360,7 +360,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fsub fast <4 x double> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_0_fsub_select2(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fsub_select2(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -372,8 +372,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
   %sub = fsub fast double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
@@ -389,7 +389,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Double pattern:
 ; Check that is not vectorized if fp-instruction has no fast-math property.
 ;
-; double fcmp_0_fsub_select2_notvectorize(double * restrict x, const int N) {
+; double fcmp_0_fsub_select2_notvectorize(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -399,7 +399,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fsub_select2_notvectorize(
 ; CHECK-NOT: <4 x doubole>
-define double @fcmp_0_fsub_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fsub_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -411,8 +411,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt double %0, 0.000000e+00
   %sub = fsub double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
@@ -429,7 +429,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fmul instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; float fcmp_0_fmult_select1(float * restrict x, const int N) {
+; float fcmp_0_fmult_select1(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -441,7 +441,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fmul fast <4 x float> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
-define float @fcmp_0_fmult_select1(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fmult_select1(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -453,8 +453,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
   %mult = fmul fast float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
@@ -470,7 +470,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Float pattern:
 ;   Check that is not vectorized if fp-instruction has no fast-math property.
 ;
-; float fcmp_0_fmult_select1_notvectorize(float * restrict x, const int N) {
+; float fcmp_0_fmult_select1_notvectorize(ptr restrict x, const int N) {
 ;   float sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > (float)0.)
@@ -480,7 +480,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fmult_select1_notvectorize(
 ; CHECK-NOT: <4 x float>
-define float @fcmp_0_fmult_select1_notvectorize(float* noalias %x, i32 %N) nounwind readonly {
+define float @fcmp_0_fmult_select1_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -492,8 +492,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt float %0, 0.000000e+00
   %mult = fmul float %sum.1, %0
   %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
@@ -510,7 +510,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorization of reduction code which has an fmul instruction after
 ;   an fcmp instruction which compares an array element and 0.
 ;
-; double fcmp_0_fmult_select2(double * restrict x, const int N) {
+; double fcmp_0_fmult_select2(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -522,7 +522,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
 ; CHECK: %[[V3:.*]] = fmul fast <4 x double> %[[V2:.*]], %[[V0]]
 ; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
-define double @fcmp_0_fmult_select2(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fmult_select2(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -534,8 +534,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
   %mult = fmul fast double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
@@ -551,7 +551,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; Double pattern:
 ;   Check that is not vectorized if fp-instruction has no fast-math property.
 ;
-; double fcmp_0_fmult_select2_notvectorize(double * restrict x, const int N) {
+; double fcmp_0_fmult_select2_notvectorize(ptr restrict x, const int N) {
 ;   double sum = 0.
 ;   for (int i = 0; i < N; ++i)
 ;     if (x[i] > 0.)
@@ -561,7 +561,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; CHECK-LABEL: @fcmp_0_fmult_select2_notvectorize(
 ; CHECK-NOT: <4 x double>
-define double @fcmp_0_fmult_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
+define double @fcmp_0_fmult_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
 entry:
   %cmp.1 = icmp sgt i32 %N, 0
   br i1 %cmp.1, label %for.header, label %for.end
@@ -573,8 +573,8 @@ for.header:                                       ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.header
   %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
   %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp.2 = fcmp ogt double %0, 0.000000e+00
   %mult = fmul double %sum.1, %0
   %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
@@ -591,7 +591,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;   Check vectorisation of reduction code with a pair of selects to different
 ;   fadd patterns.
 ;
-; float fcmp_multi(float *a, int n) {
+; float fcmp_multi(ptr a, int n) {
 ;   float sum=0.0;
 ;   for (int i=0;i<n;i++) {
 ;     if (a[i]>1.0)
@@ -616,7 +616,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
 ; CHECK: fadd fast <4 x float> %[[S2]],
-define float @fcmp_multi(float* nocapture readonly %a, i32 %n) nounwind readonly {
+define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 entry:
   %cmp10 = icmp sgt i32 %n, 0
   br i1 %cmp10, label %for.body.preheader, label %for.end
@@ -628,8 +628,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.inc, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
   %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+00
   br i1 %cmp1, label %for.inc, label %if.else
 
@@ -661,7 +661,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ;   Check vectorisation of reduction code with a pair of selects to different
 ;   instructions { fadd, fsub } but equivalent (change in constant).
 ;
-; float fcmp_multi(float *a, int n) {
+; float fcmp_multi(ptr a, int n) {
 ;   float sum=0.0;
 ;   for (int i=0;i<n;i++) {
 ;     if (a[i]>1.0)
@@ -683,7 +683,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer
 ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
 ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
-define float @fcmp_fadd_fsub(float* nocapture readonly %a, i32 %n) nounwind readonly {
+define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body.preheader, label %for.end
@@ -695,8 +695,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.inc, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
   %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+00
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -727,7 +727,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ;   Check lack of vectorisation of reduction code with a pair of non-compatible
 ;   instructions { fadd, fmul }.
 ;
-; float fcmp_multi(float *a, int n) {
+; float fcmp_multi(ptr a, int n) {
 ;   float sum=0.0;
 ;   for (int i=0;i<n;i++) {
 ;     if (a[i]>1.0)
@@ -740,7 +740,7 @@ for.end:                                          ; preds = %for.inc, %entry
 
 ; CHECK-LABEL: @fcmp_fadd_fmul(
 ; CHECK-NOT: <4 x float>
-define float @fcmp_fadd_fmul(float* nocapture readonly %a, i32 %n) nounwind readonly {
+define float @fcmp_fadd_fmul(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body.preheader, label %for.end
@@ -752,8 +752,8 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.inc, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
   %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+00
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -793,7 +793,7 @@ for.end:                                          ; preds = %for.inc, %entry
 ;     return sum;
 ; }
 
-define float @fcmp_store_back(float* nocapture %a, i32 %LEN) nounwind readonly {
+define float @fcmp_store_back(ptr nocapture %a, i32 %LEN) nounwind readonly {
 ; CHECK-LABEL: @fcmp_store_back(
 ; CHECK-NOT:     <4 x float>
 ;
@@ -808,10 +808,10 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %0, %sum.08
-  store float %add, float* %arrayidx, align 4
+  store float %add, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
index f3357a3..bf8bfb4 100644
--- a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
@@ -45,21 +45,21 @@ thread-pre-split.preheader:                       ; preds = %9
 
 .thread-pre-split.loopexit_crit_edge:             ; preds = %19
   %scevgep.sum = xor i64 %umax, -1
-  %scevgep45 = getelementptr i8, i8* %d.020, i64 %scevgep.sum
+  %scevgep45 = getelementptr i8, ptr %d.020, i64 %scevgep.sum
   br label %thread-pre-split.loopexit
 
 thread-pre-split.loopexit:                        ; preds = %11, %.thread-pre-split.loopexit_crit_edge
-  %d.1.lcssa = phi i8* [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ]
+  %d.1.lcssa = phi ptr [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ]
   br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21
 
 .lr.ph21:                                         ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader
-  %d.020 = phi i8* [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
+  %d.020 = phi ptr [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
   %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ]
   br i1 undef, label %11, label %22
 
 ; <label>:11                                      ; preds = %.lr.ph21
-  %12 = getelementptr inbounds [0 x i8], [0 x i8]* @PL_utf8skip, i64 0, i64 undef
-  %13 = load i8, i8* %12, align 1
+  %12 = getelementptr inbounds [0 x i8], ptr @PL_utf8skip, i64 0, i64 undef
+  %13 = load i8, ptr %12, align 1
   %14 = zext i8 %13 to i64
   %15 = icmp ugt i64 %14, %10
   %. = select i1 %15, i64 %10, i64 %14
@@ -91,7 +91,7 @@ thread-pre-split.loopexit:                        ; preds = %11, %.thread-pre-sp
   br label %26
 
 ; <label>:26                                      ; preds = %25, %24, %23, %22
-  %27 = load i64, i64* %len, align 8
+  %27 = load i64, ptr %len, align 8
   %28 = add i64 %27, -1
   br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
 
diff --git a/llvm/test/Transforms/LoopVectorize/increment.ll b/llvm/test/Transforms/LoopVectorize/increment.ll
index 5308b70..b5af65a 100644
--- a/llvm/test/Transforms/LoopVectorize/increment.ll
+++ b/llvm/test/Transforms/LoopVectorize/increment.ll
@@ -19,11 +19,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -41,20 +41,20 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 ;CHECK-LABEL: @histogram(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret i32
-define i32 @histogram(i32* nocapture noalias %A, i32* nocapture noalias %B, i32 %n) nounwind uwtable ssp {
+define i32 @histogram(ptr nocapture noalias %A, ptr nocapture noalias %B, i32 %n) nounwind uwtable ssp {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
   %inc = add nsw i32 %1, 1
-  store i32 %inc, i32* %arrayidx2, align 4
+  store i32 %inc, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
index 4af5ba8..692bedc 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll
@@ -3,7 +3,7 @@
 
 
 ; Test case where %gep has multiple uses of %iv.
-define void @multiple_iv_uses_in_same_instruction([100 x [100 x i32]]* %ptr) {
+define void @multiple_iv_uses_in_same_instruction(ptr %ptr) {
 ; CHECK-LABEL: @multiple_iv_uses_in_same_instruction(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -16,10 +16,10 @@ define void @multiple_iv_uses_in_same_instruction([100 x [100 x i32]]* %ptr) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* [[PTR:%.*]], i64 0, i64 [[TMP3]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* [[PTR]], i64 0, i64 [[TMP4]], i64 [[TMP4]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[TMP3]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[TMP4]], i64 [[TMP4]]
+; CHECK-NEXT:    store i32 [[TMP1]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -31,9 +31,9 @@ define void @multiple_iv_uses_in_same_instruction([100 x [100 x i32]]* %ptr) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* [[PTR]], i64 0, i64 [[IV]], i64 [[IV]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[IV]], i64 [[IV]]
 ; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[IV]] to i32
-; CHECK-NEXT:    store i32 [[T]], i32* [[GEP]], align 4
+; CHECK-NEXT:    store i32 [[T]], ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -45,9 +45,9 @@ entry:
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %gep = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %ptr, i64 0, i64 %iv, i64 %iv
+  %gep = getelementptr inbounds [100 x [100 x i32]], ptr %ptr, i64 0, i64 %iv, i64 %iv
   %t = trunc i64 %iv to i32
-  store i32 %t, i32* %gep, align 4
+  store i32 %t, ptr %gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 100
   br i1 %exitcond, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll b/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll
index d88284a..d405442 100644
--- a/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction-unroll-novec.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 
 ; Test for PR54427.
-define void @test_nonconst_start_and_step(i32* %dst, i32 %start, i32 %step, i64 %N) {
+define void @test_nonconst_start_and_step(ptr %dst, i32 %start, i32 %step, i64 %N) {
 ; CHECK-LABEL: @test_nonconst_start_and_step(
 ; CHECK:         [[NEG_STEP:%.+]] = sub i32 0, %step
 ; CHECK:       vector.body:
@@ -20,10 +20,10 @@ define void @test_nonconst_start_and_step(i32* %dst, i32 %start, i32 %step, i64
 ; CHECK-NEXT:    [[INDUCTION4:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 [[INDUCTION]], %step
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw i32 [[INDUCTION2]], %step
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDUCTION3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION4]]
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[TMP8]], align 2
-; CHECK-NEXT:    store i32 [[TMP7]], i32* [[TMP9]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDUCTION3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDUCTION4]]
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[TMP8]], align 2
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP9]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label %middle.block, label %vector.body
@@ -35,8 +35,8 @@ loop:
   %primary.iv = phi i64 [ 0, %entry ], [ %primary.iv.next, %loop ]
   %iv.down = phi i32 [ %start, %entry ], [ %iv.down.next, %loop ]
   %iv.down.next = sub nsw i32 %iv.down, %step
-  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 %primary.iv
-  store i32 %iv.down.next, i32* %gep.dst, align 2
+  %gep.dst = getelementptr inbounds i32, ptr %dst, i64 %primary.iv
+  store i32 %iv.down.next, ptr %gep.dst, align 2
   %primary.iv.next = add nuw nsw i64 %primary.iv, 1
   %exitcond = icmp eq i64 %primary.iv.next, %N
   br i1 %exitcond, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/infiniteloop.ll b/llvm/test/Transforms/LoopVectorize/infiniteloop.ll
index fe69b44..f2a900e 100644
--- a/llvm/test/Transforms/LoopVectorize/infiniteloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/infiniteloop.ll
@@ -17,18 +17,18 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; CHECK-LABEL: @fn1(
 define void @fn1()  {
 entry:
-  store i64 0, i64* @a, align 8
+  store i64 0, ptr @a, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %inc1 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  store volatile i32 0, i32* @x, align 4
+  store volatile i32 0, ptr @x, align 4
   %inc = add nsw i64 %inc1, 1
   %cmp = icmp sgt i64 %inc1, -2
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
   %inc.lcssa = phi i64 [ %inc, %for.body ]
-  store i64 %inc.lcssa, i64* @a, align 8
+  store i64 %inc.lcssa, ptr @a, align 8
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll b/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll
index 67cef7c..7edf0d2 100644
--- a/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll
+++ b/llvm/test/Transforms/LoopVectorize/int_sideeffect.ll
@@ -6,15 +6,15 @@ declare void @llvm.sideeffect()
 
 ; CHECK-LABEL: store_ones
 ; CHECK: store <4 x float>
-define void @store_ones(float* %p, i64 %n) nounwind {
+define void @store_ones(ptr %p, i64 %n) nounwind {
 bb7.lr.ph:
   br label %bb7
 
 bb7:
   %i.02 = phi i64 [ 0, %bb7.lr.ph ], [ %tmp13, %bb7 ]
   call void @llvm.sideeffect()
-  %tmp10 = getelementptr inbounds float, float* %p, i64 %i.02
-  store float 1.0, float* %tmp10, align 4
+  %tmp10 = getelementptr inbounds float, ptr %p, i64 %i.02
+  store float 1.0, ptr %tmp10, align 4
   %tmp13 = add i64 %i.02, 1
   %tmp6 = icmp ult i64 %tmp13, %n
   br i1 %tmp6, label %bb7, label %bb14
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll
index c6bf6da2..d42a15a 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-1.ll
@@ -36,13 +36,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ;}
 
 ; CHECK: vector.body:
-; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4
 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 
 %class.Complex = type { float, float }
 
-define void @_Z4testP7ComplexS0_mm(%class.Complex* noalias nocapture %out, %class.Complex* noalias nocapture readonly %in, i64 %out_start, i64 %size) local_unnamed_addr {
+define void @_Z4testP7ComplexS0_mm(ptr noalias nocapture %out, ptr noalias nocapture readonly %in, i64 %out_start, i64 %size) local_unnamed_addr {
 entry:
   %cmp9 = icmp eq i64 %size, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
@@ -59,19 +59,16 @@ for.cond.cleanup:
 for.body:
   %out_offset.010 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %add = add i64 %out_offset.010, %out_start
-  %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %in, i64 %add
-  %0 = bitcast %class.Complex* %arrayidx to i32*
-  %1 = load i32, i32* %0, align 4
-  %imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %in, i64 %add, i32 1
-  %2 = bitcast float* %imaginary_.i.i to i32*
-  %3 = load i32, i32* %2, align 4
-  %arrayidx1 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add
-  %4 = bitcast %class.Complex* %arrayidx1 to i64*
-  %t0.sroa.4.0.insert.ext = zext i32 %3 to i64
+  %arrayidx = getelementptr inbounds %class.Complex, ptr %in, i64 %add
+  %0 = load i32, ptr %arrayidx, align 4
+  %imaginary_.i.i = getelementptr inbounds %class.Complex, ptr %in, i64 %add, i32 1
+  %1 = load i32, ptr %imaginary_.i.i, align 4
+  %arrayidx1 = getelementptr inbounds %class.Complex, ptr %out, i64 %add
+  %t0.sroa.4.0.insert.ext = zext i32 %1 to i64
   %t0.sroa.4.0.insert.shift = shl nuw i64 %t0.sroa.4.0.insert.ext, 32
-  %t0.sroa.0.0.insert.ext = zext i32 %1 to i64
+  %t0.sroa.0.0.insert.ext = zext i32 %0 to i64
   %t0.sroa.0.0.insert.insert = or i64 %t0.sroa.4.0.insert.shift, %t0.sroa.0.0.insert.ext
-  store i64 %t0.sroa.0.0.insert.insert, i64* %4, align 4
+  store i64 %t0.sroa.0.0.insert.insert, ptr %arrayidx1, align 4
   %inc = add nuw i64 %out_offset.010, 1
   %exitcond = icmp eq i64 %inc, %size
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll
index eb66743..aebbcd5 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; will not be invalidated by the wrapping checks.
 
 ; #include <stdlib.h>
-; void test(float * __restrict__ out, float * __restrict__ in, size_t size)
+; void test(ptr __restrict__ out, ptr __restrict__ in, size_t size)
 ; {
 ;    for (size_t out_offset = 0; out_offset < size; ++out_offset)
 ;      {
@@ -26,10 +26,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; }
 
 ; CHECK: vector.body:
-; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK-NOT: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4
 ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 
-define void @_Z4testPfS_m(float* noalias nocapture %out, float* noalias nocapture readonly %in, i64 %size) local_unnamed_addr {
+define void @_Z4testPfS_m(ptr noalias nocapture %out, ptr noalias nocapture readonly %in, i64 %size) local_unnamed_addr {
 entry:
   %cmp7 = icmp eq i64 %size, 0
   br i1 %cmp7, label %for.cond.cleanup, label %for.body.preheader
@@ -46,12 +46,10 @@ for.cond.cleanup:
 for.body:
   %out_offset.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %mul = shl i64 %out_offset.08, 1
-  %arrayidx = getelementptr inbounds float, float* %in, i64 %mul
-  %0 = bitcast float* %arrayidx to i32*
-  %1 = load i32, i32* %0, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %out, i64 %out_offset.08
-  %2 = bitcast float* %arrayidx1 to i32*
-  store i32 %1, i32* %2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %in, i64 %mul
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %out, i64 %out_offset.08
+  store i32 %0, ptr %arrayidx1, align 4
   %inc = add nuw i64 %out_offset.08, 1
   %exitcond = icmp eq i64 %inc, %size
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
index c30cfb4..53201e1 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll
@@ -18,15 +18,15 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; void func(unsigned * __restrict a, unsigned * __restrict b, unsigned char x, unsigned char y) {
 ;  int i = 0;
 ;  for (unsigned char index = x; i < y; index +=2, ++i)
-;    b[i] = a[index] * 2;
+;    b[i] = aptr 2;
 ;
 ; }
 
 ; CHECK: vector.body:
-; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
+; CHECK-NOT: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4
 ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 
-define void @_Z4funcPjS_hh(i32* noalias nocapture readonly %a, i32* noalias nocapture %b, i8 zeroext %x, i8 zeroext %y) local_unnamed_addr {
+define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i8 zeroext %x, i8 zeroext %y) local_unnamed_addr {
 entry:
   %cmp9 = icmp eq i8 %y, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader
@@ -45,11 +45,11 @@ for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %index.011 = phi i8 [ %add, %for.body ], [ %x, %for.body.preheader ]
   %idxprom = zext i8 %index.011 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = shl i32 %0, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx2, align 4
   %add = add i8 %index.011, 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll
index 4454cf5..7dcae9e 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-alias.ll
@@ -10,41 +10,39 @@ target triple = "arm64-apple-ios5.0.0"
 %struct.Vec4r = type { double, double, double, double }
 %struct.Vec2r = type { double, double }
 
-define void @foobar(%struct.Vec4r* nocapture readonly %p, i32 %i)
+define void @foobar(ptr nocapture readonly %p, i32 %i)
 {
 entry:
   %cp = alloca [20 x %struct.Vec2r], align 8
-  %0 = bitcast [20 x %struct.Vec2r]* %cp to i8*
   br label %for.body
 
 for.cond.cleanup:                                 ; preds = %for.body
-  %arraydecay = getelementptr inbounds [20 x %struct.Vec2r], [20 x %struct.Vec2r]* %cp, i64 0, i64 0
-  call void @g(%struct.Vec2r* nonnull %arraydecay) #4
+  call void @g(ptr nonnull %cp) #4
   ret void
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %x = getelementptr inbounds %struct.Vec4r, %struct.Vec4r* %p, i64 %indvars.iv, i32 0
-  %1 = load double, double* %x, align 8, !tbaa !3
-  %mul = fmul double %1, 2.000000e+00
-  %x4 = getelementptr inbounds [20 x %struct.Vec2r], [20 x %struct.Vec2r]* %cp, i64 0, i64 %indvars.iv, i32 0
+  %x = getelementptr inbounds %struct.Vec4r, ptr %p, i64 %indvars.iv, i32 0
+  %0 = load double, ptr %x, align 8, !tbaa !3
+  %mul = fmul double %0, 2.000000e+00
+  %x4 = getelementptr inbounds [20 x %struct.Vec2r], ptr %cp, i64 0, i64 %indvars.iv, i32 0
 
 ; The new store should alias any double rather than one of the fields of Vec2r.
 ; CHECK: store <4 x double> {{.*}} !tbaa ![[STORE_TBAA:[0-9]+]]
 ; CHECK-DAG: ![[DOUBLE_TBAA:[0-9]+]] = !{!"double", !{{[0-9+]}}, i64 0}
 ; CHECK-DAG: ![[STORE_TBAA]] = !{![[DOUBLE_TBAA]], ![[DOUBLE_TBAA]], i64 0}
-  store double %mul, double* %x4, align 8, !tbaa !8
-  %y = getelementptr inbounds %struct.Vec4r, %struct.Vec4r* %p, i64 %indvars.iv, i32 1
-  %2 = load double, double* %y, align 8, !tbaa !10
-  %mul7 = fmul double %2, 3.000000e+00
-  %y10 = getelementptr inbounds [20 x %struct.Vec2r], [20 x %struct.Vec2r]* %cp, i64 0, i64 %indvars.iv, i32 1
-  store double %mul7, double* %y10, align 8, !tbaa !11
+  store double %mul, ptr %x4, align 8, !tbaa !8
+  %y = getelementptr inbounds %struct.Vec4r, ptr %p, i64 %indvars.iv, i32 1
+  %1 = load double, ptr %y, align 8, !tbaa !10
+  %mul7 = fmul double %1, 3.000000e+00
+  %y10 = getelementptr inbounds [20 x %struct.Vec2r], ptr %cp, i64 0, i64 %indvars.iv, i32 1
+  store double %mul7, ptr %y10, align 8, !tbaa !11
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 4
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-declare void @g(%struct.Vec2r*)
+declare void @g(ptr)
 
 !llvm.module.flags = !{!0, !1}
 !llvm.ident = !{!2}
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll
index e6a77b7..7d335a8 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll
@@ -36,12 +36,12 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 ; STRIDED_MASKED: LV: Checking a loop in 'masked_strided1'
 ; STRIDED_MASKED: LV: Analyzing interleaved accesses...
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 %{{.*}}, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Inserted:  store i8  %{{.*}}, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT:     into the interleave group with  store i8 %{{.*}}, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:   %{{.*}} = load i8, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Inserted:  %{{.*}} = load i8, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT:     into the interleave group with   %{{.*}} = load i8, i8* %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 %{{.*}}, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Inserted:  store i8  %{{.*}}, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT:     into the interleave group with  store i8 %{{.*}}, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:   %{{.*}} = load i8, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Inserted:  %{{.*}} = load i8, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT:     into the interleave group with   %{{.*}} = load i8, ptr %{{.*}}, align 1
 
 ; Scenario 2: Check the case where it is illegal to create a masked interleave-
 ; group because the first access is predicated, and the second isn't.
@@ -65,14 +65,14 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 ; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided2'
 ; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
-; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
+; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
 ; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
 ; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
 
 ; STRIDED_MASKED: LV: Checking a loop in 'masked_strided2'
 ; STRIDED_MASKED: LV: Analyzing interleaved accesses...
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
 ; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
 
 
@@ -103,8 +103,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 
 ; STRIDED_MASKED: LV: Checking a loop in 'masked_strided3'
 ; STRIDED_MASKED: LV: Analyzing interleaved accesses...
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, i8* %{{.*}}, align 1
-; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, i8* %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 2, ptr %{{.*}}, align 1
+; STRIDED_MASKED-NEXT: LV: Creating an interleave group with:  store i8 1, ptr %{{.*}}, align 1
 ; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
 
 
@@ -113,7 +113,7 @@ source_filename = "test.c"
 target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 target triple = "i386-unknown-linux-gnu"
 
-define dso_local void @masked_strided1(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
+define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -125,18 +125,18 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.024, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
   %add = or i32 %mul, 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %p, i32 %add
-  %1 = load i8, i8* %arrayidx4, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
+  %1 = load i8, ptr %arrayidx4, align 1
   %cmp.i = icmp slt i8 %0, %1
   %spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
-  %arrayidx6 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %spec.select.i, i8* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %spec.select.i, ptr %arrayidx6, align 1
   %sub = sub i8 0, %spec.select.i
-  %arrayidx11 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx11, align 1
+  %arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx11, align 1
   br label %for.inc
 
 for.inc:
@@ -149,7 +149,7 @@ for.end:
 }
 
 
-define dso_local void @masked_strided2(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
+define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -157,15 +157,15 @@ entry:
 for.body:
   %ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
   %mul = shl nuw nsw i32 %ix.012, 1
-  %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 1, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 1, ptr %arrayidx, align 1
   %cmp1 = icmp ugt i32 %ix.012, %conv
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
   %add = or i32 %mul, 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 2, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 2, ptr %arrayidx3, align 1
   br label %for.inc
 
 for.inc:
@@ -178,7 +178,7 @@ for.end:
 }
 
 
-define dso_local void @masked_strided3(i8* noalias nocapture readnone %p, i8* noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
+define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
 entry:
   %conv = zext i8 %guard1 to i32
   %conv3 = zext i8 %guard2 to i32
@@ -191,8 +191,8 @@ for.body:
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:
-  %arrayidx = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 1, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 1, ptr %arrayidx, align 1
   br label %if.end
 
 if.end:
@@ -201,8 +201,8 @@ if.end:
 
 if.then6:
   %add = or i32 %mul, 1
-  %arrayidx7 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 2, i8* %arrayidx7, align 1
+  %arrayidx7 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 2, ptr %arrayidx7, align 1
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index 652fc3b..d307dd1 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ;
 ;
 
-define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
+define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
 ; CHECK-LABEL: @interleaved_with_cond_store_0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -30,26 +30,25 @@ define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP2_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[TMP2_1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP6]], i64* [[TMP2_1]], align 8
+; CHECK-NEXT:    store i64 [[TMP6]], ptr [[TMP2_1]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP8]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT:    store i64 [[TMP10]], i64* [[TMP9]], align 8
+; CHECK-NEXT:    store i64 [[TMP10]], ptr [[TMP9]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -62,12 +61,12 @@ define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = load i64, i64* [[P_1]], align 8
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = load i64, ptr [[P_1]], align 8
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[TMP12]], [[X]]
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i64 [[TMP12]], i64* [[P_1]], align 8
+; CHECK-NEXT:    store i64 [[TMP12]], ptr [[P_1]], align 8
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -81,13 +80,13 @@ entry:
 
 for.body:
   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
-  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
-  %0 = load i64, i64* %p.1, align 8
+  %p.1 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 1
+  %0 = load i64, ptr %p.1, align 8
   %1 = icmp eq i64 %0, %x
   br i1 %1, label %if.then, label %if.merge
 
 if.then:
-  store i64 %0, i64* %p.1, align 8
+  store i64 %0, ptr %p.1, align 8
   br label %if.merge
 
 if.merge:
@@ -110,7 +109,7 @@ for.end:
 ;
 ;
 
-define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
+define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) {
 ; CHECK-LABEL: @interleaved_with_cond_store_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -127,35 +126,33 @@ define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[TMP4]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP9]], i64* [[PTR0]], align 8
+; CHECK-NEXT:    store i64 [[TMP9]], ptr [[PTR0]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT:    store i64 [[TMP12]], i64* [[TMP11]], align 8
+; CHECK-NEXT:    store i64 [[TMP12]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64* [[TMP3]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP13]], align 8
+; CHECK-NEXT:    [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 0
-; CHECK-NEXT:    store i64 [[TMP14]], i64* [[TMP4]], align 8
+; CHECK-NEXT:    store i64 [[TMP14]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 2
-; CHECK-NEXT:    store i64 [[TMP15]], i64* [[TMP5]], align 8
+; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -166,17 +163,17 @@ define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 0
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 1
-; CHECK-NEXT:    [[TMP17:%.*]] = load i64, i64* [[P_1]], align 8
+; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
+; CHECK-NEXT:    [[TMP17:%.*]] = load i64, ptr [[P_1]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[TMP17]], [[X]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i64 [[TMP17]], i64* [[P_0]], align 8
+; CHECK-NEXT:    store i64 [[TMP17]], ptr [[P_0]], align 8
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:
-; CHECK-NEXT:    [[TMP19:%.*]] = load i64, i64* [[P_0]], align 8
-; CHECK-NEXT:    store i64 [[TMP19]], i64* [[P_1]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[P_0]], align 8
+; CHECK-NEXT:    store i64 [[TMP19]], ptr [[P_1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -188,19 +185,19 @@ entry:
 
 for.body:
   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
-  %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
-  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
-  %0 = load i64, i64* %p.1, align 8
+  %p.0 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 0
+  %p.1 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 1
+  %0 = load i64, ptr %p.1, align 8
   %1 = icmp eq i64 %0, %x
   br i1 %1, label %if.then, label %if.merge
 
 if.then:
-  store i64 %0, i64* %p.0, align 8
+  store i64 %0, ptr %p.0, align 8
   br label %if.merge
 
 if.merge:
-  %2 = load i64, i64* %p.0, align 8
-  store i64 %2, i64 *%p.1, align 8
+  %2 = load i64, ptr %p.0, align 8
+  store i64 %2, ptr %p.1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -219,7 +216,7 @@ for.end:
 ;
 ;
 
-define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
+define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) {
 ; CHECK-LABEL: @interleaved_with_cond_store_2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -236,29 +233,28 @@ define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], %pair* [[P:%.*]], i64 [[INDEX]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <4 x i64>*
-; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    store i64 [[X]], i64* [[TMP3]], align 8
-; CHECK-NEXT:    store i64 [[X]], i64* [[TMP4]], align 8
+; CHECK-NEXT:    store i64 [[X]], ptr [[TMP3]], align 8
+; CHECK-NEXT:    store i64 [[X]], ptr [[TMP4]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT:    store i64 [[TMP9]], i64* [[PTR1]], align 8
+; CHECK-NEXT:    store i64 [[TMP9]], ptr [[PTR1]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP7]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT:    store i64 [[TMP12]], i64* [[TMP11]], align 8
+; CHECK-NEXT:    store i64 [[TMP12]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -271,14 +267,14 @@ define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 0
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], %pair* [[P]], i64 [[I]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = load i64, i64* [[P_1]], align 8
-; CHECK-NEXT:    store i64 [[X]], i64* [[P_0]], align 8
+; CHECK-NEXT:    [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0
+; CHECK-NEXT:    [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
+; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[P_1]], align 8
+; CHECK-NEXT:    store i64 [[X]], ptr [[P_0]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[TMP14]], [[X]]
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    store i64 [[TMP14]], i64* [[P_1]], align 8
+; CHECK-NEXT:    store i64 [[TMP14]], ptr [[P_1]], align 8
 ; CHECK-NEXT:    br label [[IF_MERGE]]
 ; CHECK:       if.merge:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -292,15 +288,15 @@ entry:
 
 for.body:
   %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
-  %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
-  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
-  %0 = load i64, i64* %p.1, align 8
-  store i64 %x, i64* %p.0, align 8
+  %p.0 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 0
+  %p.1 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 1
+  %0 = load i64, ptr %p.1, align 8
+  store i64 %x, ptr %p.0, align 8
   %1 = icmp eq i64 %0, %x
   br i1 %1, label %if.then, label %if.merge
 
 if.then:
-  store i64 %0, i64* %p.1, align 8
+  store i64 %0, ptr %p.1, align 8
   br label %if.merge
 
 if.merge:
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
index c43020e..081dcd83 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
@@ -4,7 +4,7 @@
 ; by the loop's exit condition, which is not vectorized, and is therefore
 ; considered uniform while also forming an interleave group.
 
-%0 = type { i32 ()*, i32 }
+%0 = type { ptr, i32 }
 
 @0 = internal unnamed_addr constant [59 x %0] [%0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
@@ -24,24 +24,24 @@
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
-%0 {i32 ()* null, i32 258}, %0 zeroinitializer, %0 zeroinitializer,
+%0 {ptr null, i32 258}, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
 %0 zeroinitializer], align 8
 
 define dso_local void @test_dead_load(i32 %arg) {
 ; CHECK-LABEL: @test_dead_load(
 ; CHECK: vector.body:
-; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* %3, align 8
+; CHECK: %wide.vec = load <16 x i32>, ptr %3, align 8
 ; CHECK: %strided.vec = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 bb1:
   br label %bb2
 
 bb2:
-  %tmp = phi %0* [ %tmp6, %bb2 ], [ getelementptr inbounds ([59 x %0], [59 x %0]* @0, i64 0, i64 0), %bb1 ]
-  %tmp3 = getelementptr inbounds %0, %0* %tmp, i64 0, i32 1
-  %tmp4 = load i32, i32* %tmp3, align 8
+  %tmp = phi ptr [ %tmp6, %bb2 ], [ @0, %bb1 ]
+  %tmp3 = getelementptr inbounds %0, ptr %tmp, i64 0, i32 1
+  %tmp4 = load i32, ptr %tmp3, align 8
   %tmp5 = icmp eq i32 %tmp4, 258
-  %tmp6 = getelementptr inbounds %0, %0* %tmp, i64 1
+  %tmp6 = getelementptr inbounds %0, ptr %tmp, i64 1
   br i1 %tmp5, label %bb65, label %bb2
 
 bb65:
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll b/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll
index bc71933..0c8f5da 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-acess-with-remarks.ll
@@ -14,28 +14,26 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 %union.anon = type { i64 }
 %MyStruct = type { i32, %"struct.std::atomic", %union.anon }
 
-define void @atomicLoadsBothWriteAndReadMem(%MyStruct *%a, %MyStruct *%b, %MyStruct *%lim) {
+define void @atomicLoadsBothWriteAndReadMem(ptr %a, ptr %b, ptr %lim) {
 entry:
   br label %loop
 
 loop:
-  %0 = phi %MyStruct* [ %a, %entry ], [ %ainc, %loop ]
-  %1 = phi %MyStruct* [ %b, %entry ], [ %binc, %loop ]
-  %2 = getelementptr %MyStruct, %MyStruct* %1, i64 0, i32 0
-  %3 = load i32, i32* %2, align 8
-  %4 = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 0, i32 0
-  store i32 %3, i32* %4, align 8
-  %5 = getelementptr inbounds %MyStruct, %MyStruct* %1, i64 0, i32 1, i32 0, i32 0
-  %6 = load atomic i32, i32* %5 monotonic, align 4
-  %7 = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 0, i32 1, i32 0, i32 0
-  store atomic i32 %6, i32* %7 monotonic, align 4
-  %8 = getelementptr inbounds %MyStruct, %MyStruct* %1, i64 0, i32 2, i32 0
-  %9 = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 0, i32 2, i32 0
-  %10 = load i64, i64* %8, align 8
-  store i64 %10, i64* %9, align 8
-  %binc = getelementptr inbounds %MyStruct, %MyStruct* %1, i64 1
-  %ainc = getelementptr inbounds %MyStruct, %MyStruct* %0, i64 1
-  %cond = icmp eq %MyStruct* %binc, %lim
+  %0 = phi ptr [ %a, %entry ], [ %ainc, %loop ]
+  %1 = phi ptr [ %b, %entry ], [ %binc, %loop ]
+  %2 = load i32, ptr %1, align 8
+  store i32 %2, ptr %0, align 8
+  %3 = getelementptr inbounds %MyStruct, ptr %1, i64 0, i32 1, i32 0, i32 0
+  %4 = load atomic i32, ptr %3 monotonic, align 4
+  %5 = getelementptr inbounds %MyStruct, ptr %0, i64 0, i32 1, i32 0, i32 0
+  store atomic i32 %4, ptr %5 monotonic, align 4
+  %6 = getelementptr inbounds %MyStruct, ptr %1, i64 0, i32 2, i32 0
+  %7 = getelementptr inbounds %MyStruct, ptr %0, i64 0, i32 2, i32 0
+  %8 = load i64, ptr %6, align 8
+  store i64 %8, ptr %7, align 8
+  %binc = getelementptr inbounds %MyStruct, ptr %1, i64 1
+  %ainc = getelementptr inbounds %MyStruct, ptr %0, i64 1
+  %cond = icmp eq ptr %binc, %lim
   br i1 %cond, label %exit, label %loop
 
 exit:
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index bce75ee..b2ba7cf 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -5,18 +5,18 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @sqrt_f32(
 ;CHECK: llvm.sqrt.v4f32
 ;CHECK: ret void
-define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sqrt_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -31,18 +31,18 @@ declare float @llvm.sqrt.f32(float) nounwind readnone
 ;CHECK-LABEL: @sqrt_f64(
 ;CHECK: llvm.sqrt.v4f64
 ;CHECK: ret void
-define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @sqrt_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -57,18 +57,18 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
 ;CHECK-LABEL: @sin_f32(
 ;CHECK: llvm.sin.v4f32
 ;CHECK: ret void
-define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @sin_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -83,18 +83,18 @@ declare float @llvm.sin.f32(float) nounwind readnone
 ;CHECK-LABEL: @sin_f64(
 ;CHECK: llvm.sin.v4f64
 ;CHECK: ret void
-define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @sin_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -109,18 +109,18 @@ declare double @llvm.sin.f64(double) nounwind readnone
 ;CHECK-LABEL: @cos_f32(
 ;CHECK: llvm.cos.v4f32
 ;CHECK: ret void
-define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @cos_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -135,18 +135,18 @@ declare float @llvm.cos.f32(float) nounwind readnone
 ;CHECK-LABEL: @cos_f64(
 ;CHECK: llvm.cos.v4f64
 ;CHECK: ret void
-define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @cos_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -161,18 +161,18 @@ declare double @llvm.cos.f64(double) nounwind readnone
 ;CHECK-LABEL: @exp_f32(
 ;CHECK: llvm.exp.v4f32
 ;CHECK: ret void
-define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -187,18 +187,18 @@ declare float @llvm.exp.f32(float) nounwind readnone
 ;CHECK-LABEL: @exp_f64(
 ;CHECK: llvm.exp.v4f64
 ;CHECK: ret void
-define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @exp_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -213,18 +213,18 @@ declare double @llvm.exp.f64(double) nounwind readnone
 ;CHECK-LABEL: @exp2_f32(
 ;CHECK: llvm.exp2.v4f32
 ;CHECK: ret void
-define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @exp2_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -239,18 +239,18 @@ declare float @llvm.exp2.f32(float) nounwind readnone
 ;CHECK-LABEL: @exp2_f64(
 ;CHECK: llvm.exp2.v4f64
 ;CHECK: ret void
-define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @exp2_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -265,18 +265,18 @@ declare double @llvm.exp2.f64(double) nounwind readnone
 ;CHECK-LABEL: @log_f32(
 ;CHECK: llvm.log.v4f32
 ;CHECK: ret void
-define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.log.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -291,18 +291,18 @@ declare float @llvm.log.f32(float) nounwind readnone
 ;CHECK-LABEL: @log_f64(
 ;CHECK: llvm.log.v4f64
 ;CHECK: ret void
-define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @log_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.log.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -317,18 +317,18 @@ declare double @llvm.log.f64(double) nounwind readnone
 ;CHECK-LABEL: @log10_f32(
 ;CHECK: llvm.log10.v4f32
 ;CHECK: ret void
-define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log10_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -343,18 +343,18 @@ declare float @llvm.log10.f32(float) nounwind readnone
 ;CHECK-LABEL: @log10_f64(
 ;CHECK: llvm.log10.v4f64
 ;CHECK: ret void
-define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @log10_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -369,18 +369,18 @@ declare double @llvm.log10.f64(double) nounwind readnone
 ;CHECK-LABEL: @log2_f32(
 ;CHECK: llvm.log2.v4f32
 ;CHECK: ret void
-define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @log2_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -395,18 +395,18 @@ declare float @llvm.log2.f32(float) nounwind readnone
 ;CHECK-LABEL: @log2_f64(
 ;CHECK: llvm.log2.v4f64
 ;CHECK: ret void
-define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @log2_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -421,18 +421,18 @@ declare double @llvm.log2.f64(double) nounwind readnone
 ;CHECK-LABEL: @fabs_f32(
 ;CHECK: llvm.fabs.v4f32
 ;CHECK: ret void
-define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @fabs_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -444,18 +444,18 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.fabs.f32(float) nounwind readnone
 
-define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @fabs_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.fabs(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -470,20 +470,20 @@ declare double @llvm.fabs(double) nounwind readnone
 ;CHECK-LABEL: @copysign_f32(
 ;CHECK: llvm.copysign.v4f32
 ;CHECK: ret void
-define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @copysign_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx1, align 4
   %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -495,20 +495,20 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare float @llvm.copysign.f32(float, float) nounwind readnone
 
-define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+define void @copysign_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %1 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -523,18 +523,18 @@ declare double @llvm.copysign(double, double) nounwind readnone
 ;CHECK-LABEL: @floor_f32(
 ;CHECK: llvm.floor.v4f32
 ;CHECK: ret void
-define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @floor_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -549,18 +549,18 @@ declare float @llvm.floor.f32(float) nounwind readnone
 ;CHECK-LABEL: @floor_f64(
 ;CHECK: llvm.floor.v4f64
 ;CHECK: ret void
-define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @floor_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -575,18 +575,18 @@ declare double @llvm.floor.f64(double) nounwind readnone
 ;CHECK-LABEL: @ceil_f32(
 ;CHECK: llvm.ceil.v4f32
 ;CHECK: ret void
-define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @ceil_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -601,18 +601,18 @@ declare float @llvm.ceil.f32(float) nounwind readnone
 ;CHECK-LABEL: @ceil_f64(
 ;CHECK: llvm.ceil.v4f64
 ;CHECK: ret void
-define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @ceil_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -627,18 +627,18 @@ declare double @llvm.ceil.f64(double) nounwind readnone
 ;CHECK-LABEL: @trunc_f32(
 ;CHECK: llvm.trunc.v4f32
 ;CHECK: ret void
-define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @trunc_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -653,18 +653,18 @@ declare float @llvm.trunc.f32(float) nounwind readnone
 ;CHECK-LABEL: @trunc_f64(
 ;CHECK: llvm.trunc.v4f64
 ;CHECK: ret void
-define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @trunc_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -679,18 +679,18 @@ declare double @llvm.trunc.f64(double) nounwind readnone
 ;CHECK-LABEL: @rint_f32(
 ;CHECK: llvm.rint.v4f32
 ;CHECK: ret void
-define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @rint_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -705,18 +705,18 @@ declare float @llvm.rint.f32(float) nounwind readnone
 ;CHECK-LABEL: @rint_f64(
 ;CHECK: llvm.rint.v4f64
 ;CHECK: ret void
-define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @rint_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -731,18 +731,18 @@ declare double @llvm.rint.f64(double) nounwind readnone
 ;CHECK-LABEL: @nearbyint_f32(
 ;CHECK: llvm.nearbyint.v4f32
 ;CHECK: ret void
-define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @nearbyint_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -757,18 +757,18 @@ declare float @llvm.nearbyint.f32(float) nounwind readnone
 ;CHECK-LABEL: @nearbyint_f64(
 ;CHECK: llvm.nearbyint.v4f64
 ;CHECK: ret void
-define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @nearbyint_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -783,18 +783,18 @@ declare double @llvm.nearbyint.f64(double) nounwind readnone
 ;CHECK-LABEL: @round_f32(
 ;CHECK: llvm.round.v4f32
 ;CHECK: ret void
-define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @round_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.round.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -809,18 +809,18 @@ declare float @llvm.round.f32(float) nounwind readnone
 ;CHECK-LABEL: @round_f64(
 ;CHECK: llvm.round.v4f64
 ;CHECK: ret void
-define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @round_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.round.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -835,18 +835,18 @@ declare double @llvm.round.f64(double) nounwind readnone
 ;CHECK-LABEL: @roundeven_f32(
 ;CHECK: llvm.roundeven.v4f32
 ;CHECK: ret void
-define void @roundeven_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @roundeven_f32(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.roundeven.f32(float %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -861,18 +861,18 @@ declare float @llvm.roundeven.f32(float) nounwind readnone
 ;CHECK-LABEL: @roundeven_f64(
 ;CHECK: llvm.roundeven.v4f64
 ;CHECK: ret void
-define void @roundeven_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @roundeven_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp6 = icmp sgt i32 %n, 0
   br i1 %cmp6, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.roundeven.f64(double %0) nounwind readnone
-  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -887,22 +887,22 @@ declare double @llvm.roundeven.f64(double) nounwind readnone
 ;CHECK-LABEL: @fma_f32(
 ;CHECK: llvm.fma.v4f32
 ;CHECK: ret void
-define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+define void @fma_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %w, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx4, align 4
   %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
-  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %3, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %3, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -917,22 +917,22 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
 ;CHECK-LABEL: @fma_f64(
 ;CHECK: llvm.fma.v4f64
 ;CHECK: ret void
-define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+define void @fma_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %2 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %w, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx2, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %2 = load double, ptr %arrayidx4, align 8
   %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
-  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %3, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %3, ptr %arrayidx6, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -947,22 +947,22 @@ declare double @llvm.fma.f64(double, double, double) nounwind readnone
 ;CHECK-LABEL: @fmuladd_f32(
 ;CHECK: llvm.fmuladd.v4f32
 ;CHECK: ret void
-define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+define void @fmuladd_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
-  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %2 = load float, float* %arrayidx4, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %w, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx4, align 4
   %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
-  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %3, float* %arrayidx6, align 4
+  %arrayidx6 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %3, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -977,22 +977,22 @@ declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
 ;CHECK-LABEL: @fmuladd_f64(
 ;CHECK: llvm.fmuladd.v4f64
 ;CHECK: ret void
-define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+define void @fmuladd_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z, ptr noalias %w) nounwind uwtable {
 entry:
   %cmp12 = icmp sgt i32 %n, 0
   br i1 %cmp12, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
-  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %2 = load double, double* %arrayidx4, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %w, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx2, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %2 = load double, ptr %arrayidx4, align 8
   %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
-  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %3, double* %arrayidx6, align 8
+  %arrayidx6 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %3, ptr %arrayidx6, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1007,20 +1007,20 @@ declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
 ;CHECK-LABEL: @pow_f32(
 ;CHECK: llvm.pow.v4f32
 ;CHECK: ret void
-define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @pow_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1035,20 +1035,20 @@ declare float @llvm.pow.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @pow_f64(
 ;CHECK: llvm.pow.v4f64
 ;CHECK: ret void
-define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+define void @pow_f64(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx2 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %1 = load double, double* %arrayidx2, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %z, i64 %indvars.iv
+  %1 = load double, ptr %arrayidx2, align 8
   %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1061,16 +1061,16 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: fabs_libm
 ; CHECK:  call <4 x float> @llvm.fabs.v4f32
 ; CHECK: ret void
-define void @fabs_libm(float* nocapture %x) nounwind {
+define void @fabs_libm(ptr nocapture %x) nounwind {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @fabsf(float %0) nounwind readnone
-  store float %call, float* %arrayidx, align 4
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -1095,16 +1095,16 @@ define internal float @roundf(float %x) nounwind readnone {
 ; CHECK-LABEL: internal_round
 ; CHECK-NOT:  load <4 x float>
 
-define void @internal_round(float* nocapture %x) nounwind {
+define void @internal_round(ptr nocapture %x) nounwind {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %call = tail call float @roundf(float %0) nounwind readnone
-  store float %call, float* %arrayidx, align 4
+  store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
@@ -1122,15 +1122,15 @@ declare void @round(double %f)
 ; CHECK-LABEL: wrong_signature
 ; CHECK-NOT:  load <4 x double>
 
-define void @wrong_signature(double* nocapture %x) nounwind {
+define void @wrong_signature(ptr nocapture %x) nounwind {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
-  store double %0, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
+  store double %0, ptr %arrayidx, align 4
   tail call void @round(double %0) nounwind readnone
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -1146,18 +1146,18 @@ declare double @llvm.powi.f64.i32(double %Val, i32 %power) nounwind readnone
 ;CHECK-LABEL: @powi_f64(
 ;CHECK: llvm.powi.v4f64
 ;CHECK: ret void
-define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
+define void @powi_f64(i32 %n, ptr noalias %y, ptr noalias %x, i32 %P) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %call = tail call double @llvm.powi.f64.i32(double %0, i32  %P) nounwind readnone
-  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1170,19 +1170,19 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-LABEL: @powi_f64_neg(
 ;CHECK-NOT: llvm.powi.v4f64
 ;CHECK: ret void
-define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+define void @powi_f64_neg(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %y, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %1 = trunc i64 %indvars.iv to i32
   %call = tail call double @llvm.powi.f64.i32(double %0, i32  %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
+  store double %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1197,18 +1197,18 @@ declare i64  @llvm.cttz.i64 (i64, i1) nounwind readnone
 ;CHECK-LABEL: @cttz_f64(
 ;CHECK: llvm.cttz.v4i64
 ;CHECK: ret void
-define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+define void @cttz_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %y, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
-  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %x, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1223,18 +1223,18 @@ declare i64  @llvm.ctlz.i64 (i64, i1) nounwind readnone
 ;CHECK-LABEL: @ctlz_f64(
 ;CHECK: llvm.ctlz.v4i64
 ;CHECK: ret void
-define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+define void @ctlz_f64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %y, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
-  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %x, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1246,7 +1246,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare i64 @llvm.abs.i64 (i64, i1) nounwind readnone
 
-define void @abs_i64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
+define void @abs_i64(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 ;CHECK-LABEL: @abs_i64(
 ;CHECK: llvm.abs.v4i64(<4 x i64> [[WIDE_LOADX:%.*]], i1 true)
 ;CHECK: ret void
@@ -1256,11 +1256,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %y, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx, align 8
   %call = tail call i64 @llvm.abs.i64(i64 %0, i1 true) nounwind readnone
-  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
-  store i64 %call, i64* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds i64, ptr %x, i64 %indvars.iv
+  store i64 %call, ptr %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1272,7 +1272,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 declare i32 @llvm.smin.i32 (i32, i32)
 
-define void @smin_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @smin_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @smin_i32(
 ; CHECK:         call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1282,12 +1282,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.smin.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1298,7 +1298,7 @@ end:
 
 declare i32 @llvm.smax.i32 (i32, i32)
 
-define void @smax_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @smax_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @smax_i32(
 ; CHECK:         call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1308,12 +1308,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.smax.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1324,7 +1324,7 @@ end:
 
 declare i32 @llvm.umin.i32 (i32, i32)
 
-define void @umin_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @umin_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @umin_i32(
 ; CHECK:         call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1334,12 +1334,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.umin.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1350,7 +1350,7 @@ end:
 
 declare i32 @llvm.umax.i32 (i32, i32)
 
-define void @umax_i32(i32 %n, i32* noalias %x, i32* noalias %y) {
+define void @umax_i32(i32 %n, ptr noalias %x, ptr noalias %y) {
 ; CHECK-LABEL: @umax_i32(
 ; CHECK:         call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]])
 ; CHECK:         ret void
@@ -1360,12 +1360,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.umax.i32(i32 %xld, i32 %yld)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1376,7 +1376,7 @@ end:
 
 declare i32 @llvm.fshl.i32 (i32, i32, i32)
 
-define void @fshl_i32(i32 %n, i32* noalias %x, i32* noalias %y, i32 %shAmt) {
+define void @fshl_i32(i32 %n, ptr noalias %x, ptr noalias %y, i32 %shAmt) {
 ; CHECK-LABEL: @fshl_i32(
 ; CHECK:         call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]], <4 x i32> [[SPLAT:%.*]])
 ; CHECK:         ret void
@@ -1386,12 +1386,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.fshl.i32(i32 %xld, i32 %yld, i32 %shAmt)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1402,7 +1402,7 @@ end:
 
 declare i32 @llvm.fshr.i32 (i32, i32, i32)
 
-define void @fshr_i32(i32 %n, i32* noalias %x, i32* noalias %y, i32 %shAmt) {
+define void @fshr_i32(i32 %n, ptr noalias %x, ptr noalias %y, i32 %shAmt) {
 ; CHECK-LABEL: @fshr_i32(
 ; CHECK:         call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[WIDE_LOADX:%.*]], <4 x i32> [[WIDE_LOADY:%.*]], <4 x i32> [[SPLAT:%.*]])
 ; CHECK:         ret void
@@ -1412,12 +1412,12 @@ entry:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
-  %xi = getelementptr inbounds i32, i32* %x, i32 %iv
-  %yi = getelementptr inbounds i32, i32* %y, i32 %iv
-  %xld = load i32, i32* %xi, align 4
-  %yld = load i32, i32* %yi, align 4
+  %xi = getelementptr inbounds i32, ptr %x, i32 %iv
+  %yi = getelementptr inbounds i32, ptr %y, i32 %iv
+  %xld = load i32, ptr %xi, align 4
+  %yld = load i32, ptr %yi, align 4
   %call = tail call i32 @llvm.fshr.i32(i32 %xld, i32 %yld, i32 %shAmt)
-  store i32 %call, i32* %xi, align 4
+  store i32 %call, ptr %xi, align 4
   %iv.next = add i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, %n
   br i1 %exitcond, label %end, label %loop
@@ -1431,20 +1431,20 @@ declare float @llvm.minnum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @minnum_f32(
 ;CHECK: llvm.minnum.v4f32
 ;CHECK: ret void
-define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @minnum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1459,20 +1459,20 @@ declare float @llvm.maxnum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @maxnum_f32(
 ;CHECK: llvm.maxnum.v4f32
 ;CHECK: ret void
-define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @maxnum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1487,20 +1487,20 @@ declare float @llvm.minimum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @minimum_f32(
 ;CHECK: llvm.minimum.v4f32
 ;CHECK: ret void
-define void @minimum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @minimum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.minimum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -1515,20 +1515,20 @@ declare float @llvm.maximum.f32(float, float) nounwind readnone
 ;CHECK-LABEL: @maximum_f32(
 ;CHECK: llvm.maximum.v4f32
 ;CHECK: ret void
-define void @maximum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+define void @maximum_f32(i32 %n, ptr noalias %y, ptr noalias %x, ptr noalias %z) nounwind uwtable {
 entry:
   %cmp9 = icmp sgt i32 %n, 0
   br i1 %cmp9, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %z, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %call = tail call float @llvm.maximum.f32(float %0, float %1) nounwind readnone
-  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+  store float %call, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
index d2a3ed7..f9326c4 100644
--- a/llvm/test/Transforms/LoopVectorize/irregular_type.ll
+++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll
@@ -8,16 +8,16 @@
 ; CHECK-NOT: load <4 x i7>
 ; CHECK-NOT: store <4 x i7>
 ; CHECK: for.body
-define void @foo(i7* %a, i64 %n) {
+define void @foo(ptr %a, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv
-  %0 = load i7, i7* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i7, ptr %a, i64 %indvars.iv
+  %0 = load i7, ptr %arrayidx, align 1
   %sub = add nuw nsw i7 %0, 0
-  store i7 %sub, i7* %arrayidx, align 1
+  store i7 %sub, ptr %arrayidx, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.exit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/libcall-remark.ll b/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
index 1443015..660dc6f 100644
--- a/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/libcall-remark.ll
@@ -18,10 +18,10 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [32768 x float], [32768 x float]* @data, i64 0, i64 %indvars.iv
-  %t0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [32768 x float], ptr @data, i64 0, i64 %indvars.iv
+  %t0 = load float, ptr %arrayidx, align 4
   %sqrtf = tail call float @sqrtf(float %t0)
-  store float %sqrtf, float* %arrayidx, align 4
+  store float %sqrtf, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 32768
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -38,10 +38,10 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds [32768 x float], [32768 x float]* @data, i64 0, i64 %indvars.iv
-  %t0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [32768 x float], ptr @data, i64 0, i64 %indvars.iv
+  %t0 = load float, ptr %arrayidx, align 4
   %sqrtf = tail call float @arbitrary(float %t0)
-  store float %sqrtf, float* %arrayidx, align 4
+  store float %sqrtf, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 32768
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/lifetime.ll b/llvm/test/Transforms/LoopVectorize/lifetime.ll
index e441f8b..3dd41b5 100644
--- a/llvm/test/Transforms/LoopVectorize/lifetime.ll
+++ b/llvm/test/Transforms/LoopVectorize/lifetime.ll
@@ -9,27 +9,26 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
 
-define void @test(i32 *%d) {
+define void @test(ptr %d) {
 entry:
   %arr = alloca [1024 x i32], align 16
-  %0 = bitcast [1024 x i32]* %arr to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
-  %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 8
-  store i32 100, i32* %arrayidx, align 8
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
+  %arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 8
+  store i32 100, ptr %arrayidx, align 8
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
   br i1 %exitcond, label %for.body, label %for.end
 
 for.end:
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
   ret void
 }
 
@@ -38,31 +37,29 @@ for.end:
 ; CHECK: store <2 x i32>
 ; CHECK: call void @llvm.lifetime.start
 
-define void @testbitcast(i32 *%d) {
+define void @testbitcast(ptr %d) {
 entry:
   %arr = alloca [1024 x i32], align 16
-  %0 = bitcast [1024 x i32]* %arr to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %1 = bitcast [1024 x i32]* %arr to i8*
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %1) #1
-  %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx, align 8
-  store i32 100, i32* %arrayidx, align 8
-  call void @llvm.lifetime.start.p0i8(i64 4096, i8* %1) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
+  %arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 8
+  store i32 100, ptr %arrayidx, align 8
+  call void @llvm.lifetime.start.p0(i64 4096, ptr %arr) #1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
   br i1 %exitcond, label %for.body, label %for.end
 
 for.end:
-  call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
+  call void @llvm.lifetime.end.p0(i64 4096, ptr %arr) #1
   ret void
 }
 
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1
 
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
index 21ad858..7aa8efa 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ;
-define void @vector_gep(i32** %a, i32 *%b, i64 %n) {
+define void @vector_gep(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @vector_gep(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -17,10 +17,9 @@ define void @vector_gep(i32** %a, i32 *%b, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <2 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>*
-; CHECK-NEXT:    store <2 x i32*> [[TMP0]], <2 x i32*>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <2 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -33,9 +32,9 @@ define void @vector_gep(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -47,9 +46,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 %i
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 %i
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -59,7 +58,7 @@ for.end:
 }
 
 ;
-define void @scalar_store(i32** %a, i32 *%b, i64 %n) {
+define void @scalar_store(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @scalar_store(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2)
@@ -76,12 +75,12 @@ define void @scalar_store(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[TMP3]]
-; CHECK-NEXT:    store i32* [[TMP4]], i32** [[TMP6]], align 8
-; CHECK-NEXT:    store i32* [[TMP5]], i32** [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    store ptr [[TMP4]], ptr [[TMP6]], align 8
+; CHECK-NEXT:    store ptr [[TMP5]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -93,9 +92,9 @@ define void @scalar_store(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -107,9 +106,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 %i
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 %i
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -119,7 +118,7 @@ for.end:
 }
 
 ;
-define void @expansion(i32** %a, i64 *%b, i64 %n) {
+define void @expansion(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @expansion(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2)
@@ -136,14 +135,12 @@ define void @expansion(i32** %a, i64 *%b, i64 %n) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32** [[TMP6]] to i64**
-; CHECK-NEXT:    store i64* [[TMP4]], i64** [[TMP8]], align 8
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32** [[TMP7]] to i64**
-; CHECK-NEXT:    store i64* [[TMP5]], i64** [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    store ptr [[TMP4]], ptr [[TMP6]], align 8
+; CHECK-NEXT:    store ptr [[TMP5]], ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -155,10 +152,9 @@ define void @expansion(i32** %a, i64 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR3:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32** [[VAR3]] to i64**
-; CHECK-NEXT:    store i64* [[VAR0]], i64** [[TMP11]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR3:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR3]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -170,11 +166,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i64, i64* %b, i64 %i
-  %var1 = bitcast i64* %var0 to i32*
-  %var2 = getelementptr inbounds i32*, i32** %a, i64 0
-  %var3 = getelementptr inbounds i32*, i32** %var2, i64 %i
-  store i32* %var1, i32** %var3, align 8
+  %var0 = getelementptr inbounds i64, ptr %b, i64 %i
+  %var3 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var3, align 8
   %i.next = add nuw nsw i64 %i, 2
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -184,7 +178,7 @@ for.end:
 }
 
 ;
-define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) {
+define void @no_gep_or_bitcast(ptr noalias %a, i64 %n) {
 ; CHECK-LABEL: @no_gep_or_bitcast(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -195,13 +189,12 @@ define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32** [[TMP0]] to <2 x i32*>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i64 0
-; CHECK-NEXT:    store i32 0, i32* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i64 1
-; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 0
+; CHECK-NEXT:    store i32 0, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 1
+; CHECK-NEXT:    store i32 0, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -213,9 +206,9 @@ define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = load i32*, i32** [[VAR0]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = load ptr, ptr [[VAR0]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -227,9 +220,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32*, i32** %a, i64 %i
-  %var1 = load i32*, i32** %var0, align 8
-  store i32 0, i32* %var1, align 8
+  %var0 = getelementptr inbounds ptr, ptr %a, i64 %i
+  %var1 = load ptr, ptr %var0, align 8
+  store i32 0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
index 90e601b..cd4d63b 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
@@ -4,20 +4,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; REQUIRES: asserts
 ; CHECK: LV: Can't vectorize due to memory conflicts
 
-define void @test_loop_novect(double** %arr, i64 %n) {
+define void @test_loop_novect(ptr %arr, i64 %n) {
 for.body.lr.ph:
-  %t = load double*, double** %arr, align 8
+  %t = load ptr, ptr %arr, align 8
   br label %for.body
 
 for.body:                                      ; preds = %for.body, %for.body.lr.ph
   %i = phi i64 [ 0, %for.body.lr.ph ], [ %i.next, %for.body ]
-  %a = getelementptr inbounds double, double* %t, i64 %i
+  %a = getelementptr inbounds double, ptr %t, i64 %i
   %i.next = add nuw nsw i64 %i, 1
-  %a.next = getelementptr inbounds double, double* %t, i64 %i.next
-  %t1 = load double, double* %a, align 8
-  %t2 = load double, double* %a.next, align 8
-  store double %t1, double* %a.next, align 8
-  store double %t2, double* %a, align 8
+  %a.next = getelementptr inbounds double, ptr %t, i64 %i.next
+  %t1 = load double, ptr %a, align 8
+  %t2 = load double, ptr %a.next, align 8
+  store double %t1, ptr %a.next, align 8
+  store double %t2, ptr %a, align 8
   %c = icmp eq i64 %i, %n
   br i1 %c, label %final, label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
index 278e37f..3e4179b 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
@@ -35,16 +35,16 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP2]]
-; CHECK-NEXT:    store i8 69, i8* [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP2]]
+; CHECK-NEXT:    store i8 69, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP5]]
-; CHECK-NEXT:    store i8 69, i8* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP5]]
+; CHECK-NEXT:    store i8 69, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw <2 x i32> <i32 3, i32 3>, [[VEC_IND]]
@@ -52,16 +52,16 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP9]]
-; CHECK-NEXT:    store i8 7, i8* [[TMP10]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP9]]
+; CHECK-NEXT:    store i8 7, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[TMP12]]
-; CHECK-NEXT:    store i8 7, i8* [[TMP13]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[TMP12]]
+; CHECK-NEXT:    store i8 7, ptr [[TMP13]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
@@ -75,11 +75,11 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[AJ:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[J]]
-; CHECK-NEXT:    store i8 69, i8* [[AJ]], align 8
+; CHECK-NEXT:    [[AJ:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[J]]
+; CHECK-NEXT:    store i8 69, ptr [[AJ]], align 8
 ; CHECK-NEXT:    [[JP3:%.*]] = add nuw nsw i32 3, [[J]]
-; CHECK-NEXT:    [[AJP3:%.*]] = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 [[JP3]]
-; CHECK-NEXT:    store i8 7, i8* [[AJP3]], align 8
+; CHECK-NEXT:    [[AJP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[JP3]]
+; CHECK-NEXT:    store i8 7, ptr [[AJP3]], align 8
 ; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i32 [[J]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
@@ -91,11 +91,11 @@ entry:
 
 for.body:
   %j = phi i32 [ 0, %entry ], [ %j.next, %for.body ]
-  %aj = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 %j
-  store i8 69, i8* %aj, align 8
+  %aj = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 %j
+  store i8 69, ptr %aj, align 8
   %jp3 = add nuw nsw i32 3, %j
-  %ajp3 = getelementptr inbounds [18 x i8], [18 x i8]* @a, i32 0, i32 %jp3
-  store i8 7, i8* %ajp3, align 8
+  %ajp3 = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 %jp3
+  store i8 7, ptr %ajp3, align 8
   %j.next = add nuw nsw i32 %j, 1
   %exitcond = icmp eq i32 %j.next, 15
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/memdep.ll b/llvm/test/Transforms/LoopVectorize/memdep.ll
index 16a1fd7..b891b43 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep.ll
@@ -14,18 +14,18 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @f1_vec(
 ; CHECK: <2 x i32>
 
-define void @f1_vec(i32* %A) {
+define void @f1_vec(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %indvars.iv.next = add i32 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv.next
+  %0 = load i32, ptr %arrayidx, align 4
   %add1 = add nsw i32 %0, 1
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  store i32 %add1, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  store i32 %add1, ptr %arrayidx3, align 4
   %exitcond = icmp ne i32 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.body, label %for.end
 
@@ -40,18 +40,18 @@ for.end:
 ; CHECK-LABEL: @f2_novec(
 ; CHECK-NOT: <2 x i32>
 
-define void @f2_novec(i32* %A) {
+define void @f2_novec(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, 1
   %indvars.iv.next = add i32 %indvars.iv, 1
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv.next
+  store i32 %add, ptr %arrayidx3, align 4
   %exitcond = icmp ne i32 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.body, label %for.end
 
@@ -69,20 +69,20 @@ for.end:
 ; WIDTH: f3_vec_len
 ; WIDTH-NOT: <4 x i32>
 
-define void @f3_vec_len(i32* %A) {
+define void @f3_vec_len(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.01 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, 1
   %add1 = add nsw i32 %i.01, 2
   %idxprom2 = sext i32 %add1 to i64
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
-  store i32 %add, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom2
+  store i32 %add, ptr %arrayidx3, align 4
   %inc = add nsw i32 %i.01, 1
   %cmp = icmp slt i32 %inc, 1024
   br i1 %cmp, label %for.body, label %for.end
@@ -101,20 +101,20 @@ for.end:
 ; CHECK-LABEL: @f5(
 ; CHECK-NOT: <2 x i32>
 
-define void @f5(i32*  %A, i32* %B) {
+define void @f5(ptr  %A, ptr %B) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
-  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv.next
-  %1 = load i32, i32* %arrayidx4, align 4
-  store i32 %1, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv.next
+  %1 = load i32, ptr %arrayidx4, align 4
+  store i32 %1, ptr %arrayidx, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
   br i1 %exitcond, label %for.body, label %for.end
@@ -132,7 +132,7 @@ for.end:
 ; CHECK-LABEL: @f6
 ; CHECK-NOT: <2 x i32>
 
-define i32 @f6(i32* %a, i32 %tmp) {
+define i32 @f6(ptr %a, i32 %tmp) {
 entry:
   br label %for.body
 
@@ -140,10 +140,10 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-  store i32 %tmp.addr.08, i32* %arrayidx, align 4
-  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx3, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
+  store i32 %tmp.addr.08, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx3, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
   br i1 %exitcond, label %for.body, label %for.end
@@ -164,21 +164,21 @@ for.end:
 ; CHECK-LABEL: @nostoreloadforward(
 ; CHECK-NOT: <2 x i32>
 
-define void @nostoreloadforward(i32* %A) {
+define void @nostoreloadforward(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -3
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv, 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2
-  %3 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %2
+  %3 = load i32, ptr %arrayidx2, align 4
   %add3 = add nsw i32 %3, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add3, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add3, ptr %arrayidx5, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
@@ -199,21 +199,21 @@ for.end:
 ; CHECK-LABEL: @nostoreloadforward2(
 ; CHECK-NOT: <2 x i32>
 
-define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
+define void @nostoreloadforward2(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %0, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %0, ptr %arrayidx2, align 4
   %1 = add nsw i64 %indvars.iv, -3
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
-  %2 = load i32, i32* %arrayidx4, align 4
-  %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  store i32 %2, i32* %arrayidx6, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %1
+  %2 = load i32, ptr %arrayidx4, align 4
+  %arrayidx6 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  store i32 %2, ptr %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 128
@@ -258,11 +258,11 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl i64 %indvars.iv, 1
   %1 = sub nuw nsw i64 64, %0
-  %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %1
-  store i32 69, i32* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds [64 x i32], ptr @a, i64 0, i64 %1
+  store i32 69, ptr %arrayidx, align 8
   %2 = sub nuw nsw i64 52, %0
-  %arrayidx4 = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %2
-  store i32 7, i32* %arrayidx4, align 8
+  %arrayidx4 = getelementptr inbounds [64 x i32], ptr @a, i64 0, i64 %2
+  store i32 7, ptr %arrayidx4, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 26
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
index 914a27d..151a882 100644
--- a/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
@@ -14,21 +14,21 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 ; CHECK: remark: source.c:4:16: loop not vectorized: cannot identify array bounds
 
-define void @test_unknown_bounds(i64 %n, i32* nocapture %A, i32* nocapture readonly %B) !dbg !13 {
+define void @test_unknown_bounds(i64 %n, ptr nocapture %A, ptr nocapture readonly %B) !dbg !13 {
 entry:
   %cmp10 = icmp sgt i64 %n, 0
   br i1 %cmp10, label %for.body, label %for.cond.cleanup
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64, !dbg !35
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1, !dbg !35
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !35
   %add = add nsw i32 %1, 1
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !28
@@ -47,7 +47,7 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 ; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}:
 
-define void @test_nodep(i64 %n, i32* nocapture readonly %A, i32* nocapture %B) !dbg !44 {
+define void @test_nodep(i64 %n, ptr nocapture readonly %A, ptr nocapture %B) !dbg !44 {
 entry:
   %cmp12 = icmp sgt i64 %n, 1
   br i1 %cmp12, label %for.body, label %for.cond.cleanup
@@ -55,14 +55,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -1
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !61
-  %1 = load i32, i32* %arrayidx, align 4, !dbg !61
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !61
+  %1 = load i32, ptr %arrayidx, align 4, !dbg !61
   %2 = add nuw nsw i64 %indvars.iv, 2
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !63
-  %3 = load i32, i32* %arrayidx2, align 4, !dbg !63
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %2, !dbg !63
+  %3 = load i32, ptr %arrayidx2, align 4, !dbg !63
   %add3 = add nsw i32 %3, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %add3, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %add3, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -83,20 +83,20 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 ; }
 
 ; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}:
-define dso_local void @test_forward(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !70 {
+define dso_local void @test_forward(i64 %n, ptr nocapture %A, ptr nocapture %B) !dbg !70 {
 entry:
   %cmp11 = icmp sgt i64 %n, 1
   br i1 %cmp11, label %for.body, label %for.cond.cleanup, !dbg !81
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !83
-  store i32 10, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !83
+  store i32 10, ptr %arrayidx, align 4
   %0 = add nsw i64 %indvars.iv, -2
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !87
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !87
-  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !88
-  store i32 %1, i32* %arrayidx4, align 4, !dbg !89
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !87
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !87
+  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv, !dbg !88
+  store i32 %1, ptr %arrayidx4, align 4, !dbg !89
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !81
@@ -118,7 +118,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}:
 
-define dso_local void @test_backwardVectorizable(i64 %n, i32* nocapture %A) !dbg !93 {
+define dso_local void @test_backwardVectorizable(i64 %n, ptr nocapture %A) !dbg !93 {
 entry:
   %cmp8 = icmp sgt i64 %n, 4
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -126,11 +126,11 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -4, !dbg !106
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !108
-  %1 = load i32, i32* %arrayidx, align 4, !dbg !108
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !108
+  %1 = load i32, ptr %arrayidx, align 4, !dbg !108
   %add = add nsw i32 %1, 1
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !110
-  store i32 %add, i32* %arrayidx2, align 4, !dbg !111
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !110
+  store i32 %add, ptr %arrayidx2, align 4, !dbg !111
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -152,7 +152,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: remark: source.c:48:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at source.c:47:5
 
-define void @test_backward_dep(i64 %n, i32* nocapture %A) {
+define void @test_backward_dep(i64 %n, ptr nocapture %A) {
 entry:
   %cmp.not19 = icmp slt i64 %n, 4
   br i1 %cmp.not19, label %for.cond.cleanup, label %for.body.preheader
@@ -164,16 +164,16 @@ for.body.preheader:                               ; preds = %entry
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -1
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  %1 = load i32, i32* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !157
-  store i32 %1, i32* %arrayidx3, align 8
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  %1 = load i32, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !157
+  store i32 %1, ptr %arrayidx3, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next, !dbg !160
-  %2 = load i32, i32* %arrayidx5, align 8, !dbg !160
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next, !dbg !160
+  %2 = load i32, ptr %arrayidx5, align 8, !dbg !160
   %3 = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %3
-  store i32 %2, i32* %arrayidx8, align 8
+  %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %3
+  store i32 %2, ptr %arrayidx8, align 8
   %cmp.not = icmp ugt i64 %indvars.iv.next, %n
   br i1 %cmp.not, label %for.cond.cleanup, label %for.body
 
@@ -197,20 +197,20 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: remark: source.c:61:12: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK-NEXT: Forward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:60:5
 
-define void @test_forwardButPreventsForwarding_dep(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !166 {
+define void @test_forwardButPreventsForwarding_dep(i64 %n, ptr nocapture %A, ptr nocapture %B) !dbg !166 {
 entry:
   %cmp11 = icmp sgt i64 %n, 3
   br i1 %cmp11, label %for.body, label %for.cond.cleanup
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !179
-  store i32 10, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !179
+  store i32 10, ptr %arrayidx, align 4
   %0 = add nsw i64 %indvars.iv, -3
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !183
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !183
-  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %1, i32* %arrayidx4, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !183
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !183
+  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %1, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -235,7 +235,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: remark: source.c:74:5: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK: Backward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:74:21
 
-define void @test_backwardVectorizableButPreventsForwarding(i64 %n, i32* nocapture %A) !dbg !189 {
+define void @test_backwardVectorizableButPreventsForwarding(i64 %n, ptr nocapture %A) !dbg !189 {
 entry:
   %cmp13 = icmp sgt i64 %n, 15
   br i1 %cmp13, label %for.body, label %for.cond.cleanup
@@ -243,14 +243,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 15, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0
+  %1 = load i32, ptr %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv, -15
-  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !207
-  %3 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %2, !dbg !207
+  %3 = load i32, ptr %arrayidx3, align 4
   %add = add nsw i32 %3, %1
-  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !209
-  store i32 %add, i32* %arrayidx5, align 4, !dbg !209
+  %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !209
+  store i32 %add, ptr %arrayidx5, align 4, !dbg !209
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -271,7 +271,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: remark: source.c:83:7: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 ; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:82:7
 
-define void @test_unknown_dep(i64 %n, i32* nocapture %A) !dbg !214 {
+define void @test_unknown_dep(i64 %n, ptr nocapture %A) !dbg !214 {
 entry:
   %cmp8 = icmp sgt i64 %n, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -280,10 +280,10 @@ for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %0 = shl nsw i64 %indvars.iv.next, 2
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !229
-  store i32 10, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !231
-  store i32 100, i32* %arrayidx2, align 4, !dbg !231
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %0, !dbg !229
+  store i32 10, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !231
+  store i32 100, ptr %arrayidx2, align 4, !dbg !231
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll b/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
index 2b9af6d..14f8b98 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -22,11 +22,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/metadata-width.ll b/llvm/test/Transforms/LoopVectorize/metadata-width.ll
index 0c3f78c..ddf9029 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata-width.ll
@@ -5,15 +5,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @test1(
 ; CHECK: store <8 x i32>
 ; CHECK: ret void
-define void @test1(i32* nocapture %a, i32 %n) #0 {
+define void @test1(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -26,15 +26,15 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-LABEL: @test2(
 ; CHECK: store <vscale x 8 x i32>
 ; CHECK: ret void
-define void @test2(i32* nocapture %a, i32 %n) #0 {
+define void @test2(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -47,15 +47,15 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-LABEL: @test3(
 ; CHECK: store <8 x i32>
 ; CHECK: ret void
-define void @test3(i32* nocapture %a, i32 %n) #0 {
+define void @test3(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -71,15 +71,15 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-LABEL: @test4(
 ; CHECK: store <8 x i32>
 ; CHECK: ret void
-define void @test4(i32* nocapture %a, i32 %n) #0 {
+define void @test4(ptr nocapture %a, i32 %n) #0 {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 42, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 42, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll
index cdfd268..0bd29e5 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata.ll
@@ -2,17 +2,17 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind uwtable
-define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 {
+define i32 @test1(ptr nocapture %a, ptr nocapture readonly %b) #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !0
   %conv = fptosi float %0 to i32
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4, !tbaa !4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.end, label %for.body
@@ -22,8 +22,8 @@ for.end:                                          ; preds = %for.body
 }
 
 ; CHECK-LABEL: @test1
-; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]]
-; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]]
+; CHECK: load <4 x float>, ptr %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]]
+; CHECK: store <4 x i32> %{{.*}}, ptr %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]]
 ; CHECK: ret i32 0
 
 ; CHECK-DAG: ![[TFLT]] = !{![[TFLT1:[0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll b/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll
index 28882ad..741ca55 100644
--- a/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll
+++ b/llvm/test/Transforms/LoopVectorize/middle-block-dbg.ll
@@ -32,7 +32,7 @@ target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 
 define dso_local void @"?a@@YAXXZ"() local_unnamed_addr #0 !dbg !8 {
 entry:
-  %0 = load i32, i32* @"?x@@3HA", align 4, !dbg !23, !tbaa !24
+  %0 = load i32, ptr @"?x@@3HA", align 4, !dbg !23, !tbaa !24
   %1 = zext i32 %0 to i64, !dbg !28
   %vla = alloca i32, i64 %1, align 16, !dbg !28
   %cmp10 = icmp sgt i32 %0, 0, !dbg !30
@@ -43,24 +43,24 @@ for.body.preheader:
 
 for.cond.cleanup.loopexit:
   %idxprom1.phi.trans.insert = sext i32 %0 to i64
-  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %vla, i64 %idxprom1.phi.trans.insert
-  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4, !dbg !33, !tbaa !24
+  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, ptr %vla, i64 %idxprom1.phi.trans.insert
+  %.pre = load i32, ptr %arrayidx2.phi.trans.insert, align 4, !dbg !33, !tbaa !24
   br label %for.cond.cleanup, !dbg !33
 
 for.cond.cleanup:
   %2 = phi i32 [ %.pre, %for.cond.cleanup.loopexit ], [ undef, %entry ], !dbg !33
   %sub = add nsw i32 %0, -5, !dbg !33
   %idxprom3 = sext i32 %sub to i64, !dbg !33
-  %arrayidx4 = getelementptr inbounds i32, i32* %vla, i64 %idxprom3, !dbg !33
-  %3 = load i32, i32* %arrayidx4, align 4, !dbg !33, !tbaa !24
+  %arrayidx4 = getelementptr inbounds i32, ptr %vla, i64 %idxprom3, !dbg !33
+  %3 = load i32, ptr %arrayidx4, align 4, !dbg !33, !tbaa !24
   %add = add nsw i32 %3, %2, !dbg !33
-  store i32 %add, i32* @"?y@@3HA", align 4, !dbg !33, !tbaa !24
+  store i32 %add, ptr @"?y@@3HA", align 4, !dbg !33, !tbaa !24
   ret void, !dbg !34
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %vla, i64 %indvars.iv, !dbg !31
-  store i32 %0, i32* %arrayidx, align 4, !dbg !31, !tbaa !24
+  %arrayidx = getelementptr inbounds i32, ptr %vla, i64 %indvars.iv, !dbg !31
+  store i32 %0, ptr %arrayidx, align 4, !dbg !31, !tbaa !24
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !35
   %exitcond = icmp eq i64 %indvars.iv.next, %1, !dbg !30
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !30, !llvm.loop !36
diff --git a/llvm/test/Transforms/LoopVectorize/miniters.ll b/llvm/test/Transforms/LoopVectorize/miniters.ll
index 671f099..0b4c002 100644
--- a/llvm/test/Transforms/LoopVectorize/miniters.ll
+++ b/llvm/test/Transforms/LoopVectorize/miniters.ll
@@ -25,13 +25,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
-  %tmp = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
-  %tmp1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %i.09
+  %tmp = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %i.09
+  %tmp1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %tmp1, %tmp
-  %arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %i.09
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add nuw nsw i64 %i.09, 1
   %exitcond = icmp eq i64 %inc, %N
   br i1 %exitcond, label %for.end.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index dc58da2..2e0b27a 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -25,8 +25,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sgt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -53,8 +53,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp slt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -80,8 +80,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp slt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -108,8 +108,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sgt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -137,8 +137,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ugt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -165,8 +165,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ult i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -192,8 +192,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ult i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -220,8 +220,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ugt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -248,8 +248,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sge i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -276,8 +276,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp sle i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -304,8 +304,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp uge i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -332,8 +332,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp3 = icmp ule i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -355,10 +355,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32], ptr @A, i64 1, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %1 = load i32, ptr %arrayidx1, align 4
   %cmp3 = icmp sgt i32 %0, %1
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -379,10 +379,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32], ptr @A, i64 1, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %1 = load i32, ptr %arrayidx1, align 4
   %cmp3 = icmp sgt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -412,8 +412,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -437,8 +437,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast oge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -462,8 +462,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast olt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -487,8 +487,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ole float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -512,8 +512,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ugt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -537,8 +537,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast uge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -562,8 +562,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ult float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -587,8 +587,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ule float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -615,8 +615,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast olt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -640,8 +640,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ole float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -665,8 +665,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -690,8 +690,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast oge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -715,8 +715,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ult float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -740,8 +740,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ule float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -765,8 +765,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ugt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -790,8 +790,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast uge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -816,8 +816,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x double], ptr @dA, i64 0, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 4
   %cmp3 = fcmp fast olt double %0, %min.red.08
   %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -840,8 +840,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -863,8 +863,8 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
-  %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -878,15 +878,15 @@ for.end:
 ; CHECK-LABEL: @smin_intrinsic(
 ; CHECK: <2 x i32> @llvm.smin.v2i32
 ; CHECK: i32 @llvm.vector.reduce.smin.v2i32
-define i32 @smin_intrinsic(i32* nocapture readonly %x) {
+define i32 @smin_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.smin.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -899,15 +899,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @smax_intrinsic(
 ; CHECK: <2 x i32> @llvm.smax.v2i32
 ; CHECK: i32 @llvm.vector.reduce.smax.v2i32
-define i32 @smax_intrinsic(i32* nocapture readonly %x) {
+define i32 @smax_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.smax.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -920,15 +920,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @umin_intrinsic(
 ; CHECK: <2 x i32> @llvm.umin.v2i32
 ; CHECK: i32 @llvm.vector.reduce.umin.v2i32
-define i32 @umin_intrinsic(i32* nocapture readonly %x) {
+define i32 @umin_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.umin.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -941,15 +941,15 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @umax_intrinsic(
 ; CHECK: <2 x i32> @llvm.umax.v2i32
 ; CHECK: i32 @llvm.vector.reduce.umax.v2i32
-define i32 @umax_intrinsic(i32* nocapture readonly %x) {
+define i32 @umax_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi i32 [ 100, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.012
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.012
+  %0 = load i32, ptr %arrayidx, align 4
   %1 = tail call i32 @llvm.umax.i32(i32 %s.011, i32 %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -962,7 +962,7 @@ for.cond.cleanup:                                 ; preds = %for.body
 ; CHECK-LABEL: @fmin_intrinsic(
 ; CHECK: nnan nsz <2 x float> @llvm.minnum.v2f32
 ; CHECK: nnan nsz float @llvm.vector.reduce.fmin.v2f32
-define float @fmin_intrinsic(float* nocapture readonly %x) {
+define float @fmin_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -972,8 +972,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call nnan nsz float @llvm.minnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -983,7 +983,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK-LABEL: @fmax_intrinsic(
 ; CHECK: fast <2 x float> @llvm.maxnum.v2f32
 ; CHECK: fast float @llvm.vector.reduce.fmax.v2f32
-define float @fmax_intrinsic(float* nocapture readonly %x) {
+define float @fmax_intrinsic(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -993,8 +993,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call fast float @llvm.maxnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1003,7 +1003,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK-LABEL: @fmin_intrinsic_nofast(
 ; CHECK-NOT: <2 x float> @llvm.minnum.v2f32
-define float @fmin_intrinsic_nofast(float* nocapture readonly %x) {
+define float @fmin_intrinsic_nofast(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -1013,8 +1013,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call float @llvm.minnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1023,7 +1023,7 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK-LABEL: @fmax_intrinsic_nofast(
 ; CHECK-NOT: <2 x float> @llvm.maxnum.v2f32
-define float @fmax_intrinsic_nofast(float* nocapture readonly %x) {
+define float @fmax_intrinsic_nofast(ptr nocapture readonly %x) {
 entry:
   br label %for.body
 
@@ -1033,8 +1033,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.011 = phi float [ 0.000000e+00, %entry ], [ %1, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %x, i32 %i.012
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %x, i32 %i.012
+  %0 = load float, ptr %arrayidx, align 4
   %1 = tail call float @llvm.maxnum.f32(float %s.011, float %0)
   %inc = add nuw nsw i32 %i.012, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1044,7 +1044,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK-LABEL: @sminmax(
 ; Min and max intrinsics - don't vectorize
 ; CHECK-NOT: <2 x i32>
-define i32 @sminmax(i32* nocapture readonly %x, i32* nocapture readonly %y) {
+define i32 @sminmax(ptr nocapture readonly %x, ptr nocapture readonly %y) {
 entry:
   br label %for.body
 
@@ -1054,11 +1054,11 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.025
+  %0 = load i32, ptr %arrayidx, align 4
   %s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0)
-  %arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %y, i32 %i.025
+  %1 = load i32, ptr %arrayidx3, align 4
   %cond9 = tail call i32 @llvm.smax.i32(i32 %s.0., i32 %1)
   %inc = add nuw nsw i32 %i.025, 1
   %exitcond.not = icmp eq i32 %inc, 1024
@@ -1069,7 +1069,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: <2 x i32> @llvm.smin.v2i32
 ; CHECK: <2 x i32> @llvm.smin.v2i32
 ; CHECK: i32 @llvm.vector.reduce.smin.v2i32
-define i32 @sminmin(i32* nocapture readonly %x, i32* nocapture readonly %y) {
+define i32 @sminmin(ptr nocapture readonly %x, ptr nocapture readonly %y) {
 entry:
   br label %for.body
 
@@ -1079,11 +1079,11 @@ for.cond.cleanup:                                 ; preds = %for.body
 for.body:                                         ; preds = %entry, %for.body
   %i.025 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %s.024 = phi i32 [ 0, %entry ], [ %cond9, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.025
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %x, i32 %i.025
+  %0 = load i32, ptr %arrayidx, align 4
   %s.0. = tail call i32 @llvm.smin.i32(i32 %s.024, i32 %0)
-  %arrayidx3 = getelementptr inbounds i32, i32* %y, i32 %i.025
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %y, i32 %i.025
+  %1 = load i32, ptr %arrayidx3, align 4
   %cond9 = tail call i32 @llvm.smin.i32(i32 %s.0., i32 %1)
   %inc = add nuw nsw i32 %i.025, 1
   %exitcond.not = icmp eq i32 %inc, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll b/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll
index cd42dbc..ba68524 100644
--- a/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/mixed-precision-remarks.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -force-vector-interleave=2 -force-vector-width=4 -passes=loop-vectorize -pass-remarks-analysis=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s
 
 ; CHECK: remark: mixed-precision.c:3:26: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
-define void @f(float* noalias nocapture %X, i64 %N) {
+define void @f(ptr noalias nocapture %X, i64 %N) {
 entry:
   br label %for.body
 
@@ -10,12 +10,12 @@ for.cond.cleanup:
 
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %X, i64 %i
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %X, i64 %i
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double, !dbg !9
   %mul = fmul double %conv, 0x3FD5555555555555
   %conv3 = fptrunc double %mul to float
-  store float %conv3, float* %arrayidx, align 4
+  store float %conv3, ptr %arrayidx, align 4
   %inc = add nuw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -24,27 +24,27 @@ for.body:
 ; CHECK: remark: mixed-precision.c:8:8: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
 ; CHECK: remark: mixed-precision.c:7:16: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
 ; CHECK-NOT: remark: mixed-precision.c:7:16: floating point conversion changes vector width. Mixed floating point precision requires an up/down cast that will negatively impact performance.
-define void @g(float* noalias nocapture %X, float* noalias nocapture %Y, i64 %N) {
+define void @g(ptr noalias nocapture %X, ptr noalias nocapture %Y, i64 %N) {
 entry:
   %pi = alloca double
-  store double 0x400921FB54442D18, double* %pi
-  %fac = load double, double* %pi
+  store double 0x400921FB54442D18, ptr %pi
+  %fac = load double, ptr %pi
   br label %for.body
 
 for.body:
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %X, i64 %i
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %X, i64 %i
+  %0 = load float, ptr %arrayidx, align 4
   %conv = fpext float %0 to double, !dbg !10
   %mul = fmul double %conv, %fac
   %conv1 = fptrunc double %mul to float
-  store float %conv1, float* %arrayidx, align 4
-  %arrayidx5 = getelementptr inbounds float, float* %Y, i64 %i
-  %1 = load float, float* %arrayidx5, align 4
+  store float %conv1, ptr %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %Y, i64 %i
+  %1 = load float, ptr %arrayidx5, align 4
   %conv2 = fpext float %1 to double, !dbg !11
   %mul2 = fmul double %conv2, %fac
   %conv3 = fptrunc double %mul2 to float
-  store float %conv3, float* %arrayidx5, align 4
+  store float %conv3, ptr %arrayidx5, align 4
   %inc = add nuw nsw i64 %i, 1
   %exitcond.not = icmp eq i64 %inc, %N
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
index b47231f..659dc62 100644
--- a/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -15,10 +15,10 @@ entry:
   %n = alloca i32, align 4
   %k7 = alloca i32, align 4
   %nf = alloca i32, align 4
-  %0 = load i32, i32* %k7, align 4
+  %0 = load i32, ptr %k7, align 4
   %.neg1 = sub i32 0, %0
-  %n.promoted = load i32, i32* %n, align 4
-  %nf.promoted = load i32, i32* %nf, align 4
+  %n.promoted = load i32, ptr %n, align 4
+  %nf.promoted = load i32, ptr %nf, align 4
   br label %for.body
 
 for.body:
@@ -36,6 +36,6 @@ for.body:
 
 for.end:
   %add5.lcssa = phi i32 [ %add5, %for.body ]
-  store i32 %add5.lcssa, i32* %n, align 4
+  store i32 %add5.lcssa, ptr %n, align 4
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll b/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll
index 6b2d72c..dd48b0e 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-exits-versioning.ll
@@ -5,12 +5,12 @@
 
 ; Multiple branches exiting the loop to a unique exit block. The loop should
 ; be vectorized with versioning.
-define void @multiple_exits_unique_exit_block(i32* %A, i32* %B, i64 %N) {
+define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i64 %N) {
 ; CHECK-LABEL: @multiple_exits_unique_exit_block
 ; CHECK:       vector.memcheck:
 ; CHECK-LABEL: vector.body:
-; CHECK:         %wide.load = load <2 x i32>, <2 x i32>* {{.*}}, align 4
-; CHECK:         store <2 x i32> %wide.load, <2 x i32>* {{.*}}, align 4
+; CHECK:         %wide.load = load <2 x i32>, ptr {{.*}}, align 4
+; CHECK:         store <2 x i32> %wide.load, ptr {{.*}}, align 4
 ; CHECK:         br
 ;
 entry:
@@ -22,10 +22,10 @@ loop.header:
   br i1 %cond.0, label %exit, label %for.body
 
 for.body:
-  %A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
-  %lv = load i32, i32* %A.gep, align 4
-  %B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
-  store i32 %lv, i32* %B.gep, align 4
+  %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
+  %lv = load i32, ptr %A.gep, align 4
+  %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
+  store i32 %lv, ptr %B.gep, align 4
   %iv.next = add nuw i64 %iv, 1
   %cond.1 = icmp ult i64 %iv.next, 1000
   br i1 %cond.1, label %loop.header, label %exit
@@ -36,7 +36,7 @@ exit:
 
 
 ; Multiple branches exiting the loop to different blocks. Currently this is not supported.
-define i32 @multiple_exits_multiple_exit_blocks(i32* %A, i32* %B, i64 %N) {
+define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i64 %N) {
 ; CHECK-LABEL: @multiple_exits_multiple_exit_blocks
 ; CHECK-NEXT:    entry:
 ; CHECK:           br label %loop.header
@@ -51,10 +51,10 @@ loop.header:
   br i1 %cond.0, label %exit.0, label %for.body
 
 for.body:
-  %A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
-  %lv = load i32, i32* %A.gep, align 4
-  %B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
-  store i32 %lv, i32* %B.gep, align 4
+  %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
+  %lv = load i32, ptr %A.gep, align 4
+  %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
+  store i32 %lv, ptr %B.gep, align 4
   %iv.next = add nuw i64 %iv, 1
   %cond.1 = icmp ult i64 %iv.next, 1000
   br i1 %cond.1, label %loop.header, label %exit.1
diff --git a/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll b/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
index 34162c1..a280c71 100644
--- a/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -17,7 +17,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 !dbg !4 {
+define void @_Z4testPiS_i(ptr nocapture %A, ptr nocapture %B, i32 %number) #0 !dbg !4 {
 entry:
   %cmp25 = icmp sgt i32 %number, 0, !dbg !10
   br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12
@@ -33,13 +33,13 @@ for.body7.preheader:                              ; preds = %for.cond5.preheader
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv27 = phi i64 [ %indvars.iv.next28, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv27, !dbg !14
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !22
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv27, !dbg !14
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !14, !tbaa !22
   %idxprom1 = sext i32 %0 to i64, !dbg !14
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !14
-  %1 = load i32, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1, !dbg !14
+  %1 = load i32, ptr %arrayidx2, align 4, !dbg !14, !tbaa !22
   %inc = add nsw i32 %1, 1, !dbg !14
-  store i32 %inc, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
+  store i32 %inc, ptr %arrayidx2, align 4, !dbg !14, !tbaa !22
   %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1, !dbg !10
   %lftr.wideiv29 = trunc i64 %indvars.iv.next28 to i32, !dbg !10
   %exitcond30 = icmp eq i32 %lftr.wideiv29, %number, !dbg !10
@@ -47,13 +47,13 @@ for.body:                                         ; preds = %for.body.preheader,
 
 for.body7:                                        ; preds = %for.body7.preheader, %for.body7
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body7 ], [ 0, %for.body7.preheader ]
-  %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !20
-  %2 = load i32, i32* %arrayidx9, align 4, !dbg !20, !tbaa !22
+  %arrayidx9 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !20
+  %2 = load i32, ptr %arrayidx9, align 4, !dbg !20, !tbaa !22
   %idxprom10 = sext i32 %2 to i64, !dbg !20
-  %arrayidx11 = getelementptr inbounds i32, i32* %B, i64 %idxprom10, !dbg !20
-  %3 = load i32, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
+  %arrayidx11 = getelementptr inbounds i32, ptr %B, i64 %idxprom10, !dbg !20
+  %3 = load i32, ptr %arrayidx11, align 4, !dbg !20, !tbaa !22
   %inc12 = add nsw i32 %3, 1, !dbg !20
-  store i32 %inc12, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
+  store i32 %inc12, ptr %arrayidx11, align 4, !dbg !20, !tbaa !22
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !16
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !16
   %exitcond = icmp eq i32 %lftr.wideiv, %number, !dbg !16
diff --git a/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll b/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
index f2bb09d..03918a1 100644
--- a/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
@@ -9,7 +9,7 @@
 ;  }
 
 ; CHECK: warning: <unknown>:0:0: loop not interleaved: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
-define dso_local void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+define dso_local void @foo(ptr nocapture %A, ptr nocapture readonly %B, i32 %N) {
 entry:
   %cmp7 = icmp sgt i32 %N, 0
   br i1 %cmp7, label %for.body.preheader, label %for.end
@@ -20,13 +20,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
   %inc = add nsw i32 %1, 1
-  store i32 %inc, i32* %arrayidx2, align 4
+  store i32 %inc, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -36,7 +36,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; CHECK: warning: <unknown>:0:0: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
-define dso_local void @foo2(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
+define dso_local void @foo2(ptr nocapture %A, ptr nocapture readonly %B, i32 %N) {
 entry:
   %cmp7 = icmp sgt i32 %N, 0
   br i1 %cmp7, label %for.body.preheader, label %for.end
@@ -47,13 +47,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %idxprom1
+  %1 = load i32, ptr %arrayidx2, align 4
   %inc = add nsw i32 %1, 1
-  store i32 %inc, i32* %arrayidx2, align 4
+  store i32 %inc, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !3
diff --git a/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
index c44844f..b661a9a 100644
--- a/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -11,8 +11,8 @@ for.body:
   ; CHECK-NOT: sdiv <2 x i32>
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ]
-  %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* @a, i64 0, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [128 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %div = sdiv i32 %r.05, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/no_int_induction.ll b/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
index 73080bc..efb872a 100644
--- a/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -12,19 +12,19 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
 ;CHECK: ret i32
-define i32 @sum_array(i32* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+define i32 @sum_array(ptr %A, i32 %n) nounwind uwtable readonly noinline ssp {
   %1 = sext i32 %n to i64
-  %2 = getelementptr inbounds i32, i32* %A, i64 %1
+  %2 = getelementptr inbounds i32, ptr %A, i64 %1
   %3 = icmp eq i32 %n, 0
   br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 .lr.ph.i:                                         ; preds = %0, %.lr.ph.i
-  %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.03.i = phi ptr [ %6, %.lr.ph.i ], [ %A, %0 ]
   %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
-  %4 = load i32, i32* %.03.i, align 4
+  %4 = load i32, ptr %.03.i, align 4
   %5 = add nsw i32 %4, %.012.i
-  %6 = getelementptr inbounds i32, i32* %.03.i, i64 1
-  %7 = icmp eq i32* %6, %2
+  %6 = getelementptr inbounds i32, ptr %.03.i, i64 1
+  %7 = icmp eq ptr %6, %2
   br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
@@ -39,19 +39,19 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
 ;CHECK: load <4 x i32>
 ;CHECK: add <4 x i32>
 ;CHECK: ret i32
-define i32 @sum_array_as1(i32 addrspace(1)* %A, i32 %n) nounwind uwtable readonly noinline ssp {
+define i32 @sum_array_as1(ptr addrspace(1) %A, i32 %n) nounwind uwtable readonly noinline ssp {
   %1 = sext i32 %n to i64
-  %2 = getelementptr inbounds i32, i32 addrspace(1)* %A, i64 %1
+  %2 = getelementptr inbounds i32, ptr addrspace(1) %A, i64 %1
   %3 = icmp eq i32 %n, 0
   br i1 %3, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 .lr.ph.i:                                         ; preds = %0, %.lr.ph.i
-  %.03.i = phi i32 addrspace(1)* [ %6, %.lr.ph.i ], [ %A, %0 ]
+  %.03.i = phi ptr addrspace(1) [ %6, %.lr.ph.i ], [ %A, %0 ]
   %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
-  %4 = load i32, i32 addrspace(1)* %.03.i, align 4
+  %4 = load i32, ptr addrspace(1) %.03.i, align 4
   %5 = add nsw i32 %4, %.012.i
-  %6 = getelementptr inbounds i32, i32 addrspace(1)* %.03.i, i64 1
-  %7 = icmp eq i32 addrspace(1)* %6, %2
+  %6 = getelementptr inbounds i32, ptr addrspace(1) %.03.i, i64 1
+  %7 = icmp eq ptr addrspace(1) %6, %2
   br i1 %7, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %.lr.ph.i
 
 _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %.lr.ph.i, %0
diff --git a/llvm/test/Transforms/LoopVectorize/no_switch.ll b/llvm/test/Transforms/LoopVectorize/no_switch.ll
index 5bcf5ba..c62826f 100644
--- a/llvm/test/Transforms/LoopVectorize/no_switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_switch.ll
@@ -19,7 +19,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define void @_Z11test_switchPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
@@ -29,8 +29,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !14, !tbaa !16
   switch i32 %0, label %for.inc [
     i32 0, label %sw.bb
     i32 1, label %sw.bb3
@@ -43,12 +43,12 @@ sw.bb:                                            ; preds = %for.body
 
 sw.bb3:                                           ; preds = %for.body
   %2 = trunc i64 %indvars.iv to i32, !dbg !23
-  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  store i32 %2, ptr %arrayidx, align 4, !dbg !23, !tbaa !16
   br label %for.inc, !dbg !23
 
 for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
   %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
-  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  store i32 %storemerge, ptr %arrayidx, align 4, !dbg !20, !tbaa !16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
diff --git a/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
index 3aa6edc..d9ebaeb 100644
--- a/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
@@ -21,7 +21,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+define void @_Z11test_switchPii(ptr nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
@@ -31,8 +31,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
-  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32, ptr %arrayidx, align 4, !dbg !14, !tbaa !16
   switch i32 %0, label %for.inc [
     i32 0, label %sw.bb
     i32 1, label %sw.bb3
@@ -45,12 +45,12 @@ sw.bb:                                            ; preds = %for.body
 
 sw.bb3:                                           ; preds = %for.body
   %2 = trunc i64 %indvars.iv to i32, !dbg !23
-  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  store i32 %2, ptr %arrayidx, align 4, !dbg !23, !tbaa !16
   br label %for.inc, !dbg !23
 
 for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
   %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
-  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  store i32 %storemerge, ptr %arrayidx, align 4, !dbg !20, !tbaa !16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
   %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
diff --git a/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll b/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
index be27c4a..c9c45e1 100644
--- a/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
+++ b/llvm/test/Transforms/LoopVectorize/noalias-md-licm.ll
@@ -14,36 +14,36 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; LICM'ed from the inner loop.
 
 
-define void @f(i32* %a, i32* %b, i32* %c) {
+define void @f(ptr %a, ptr %b, ptr %c) {
 entry:
   br label %outer
 
 outer:
   %i.2 = phi i64 [ 0, %entry ], [ %i, %inner.end ]
-  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %i.2
+  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %i.2
   br label %inner.ph
 
 inner.ph:
 ; CHECK: vector.ph:
-; CHECK: load i32, i32* %arrayidxB,
+; CHECK: load i32, ptr %arrayidxB,
 ; CHECK: br label %vector.body
   br label %inner
 
 inner:
   %j.2 = phi i64 [ 0, %inner.ph ], [ %j, %inner ]
 
-  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %j.2
-  %loadA = load i32, i32* %arrayidxA, align 4
+  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %j.2
+  %loadA = load i32, ptr %arrayidxA, align 4
 
-  %loadB = load i32, i32* %arrayidxB, align 4
+  %loadB = load i32, ptr %arrayidxB, align 4
 
-  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %j.2
-  %loadC = load i32, i32* %arrayidxC, align 4
+  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %j.2
+  %loadC = load i32, ptr %arrayidxC, align 4
 
   %add = add nuw i32 %loadA, %loadB
   %add2 = add nuw i32 %add, %loadC
 
-  store i32 %add2, i32* %arrayidxA, align 4
+  store i32 %add2, ptr %arrayidxA, align 4
 
   %j = add nuw nsw i64 %j.2, 1
   %cond1 = icmp eq i64 %j, 20
diff --git a/llvm/test/Transforms/LoopVectorize/noalias-md.ll b/llvm/test/Transforms/LoopVectorize/noalias-md.ll
index cf3a2a5..111aafc 100644
--- a/llvm/test/Transforms/LoopVectorize/noalias-md.ll
+++ b/llvm/test/Transforms/LoopVectorize/noalias-md.ll
@@ -22,7 +22,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; store to C[i] can be DSE'd.
 
 
-define void @f(i32* %a, i32* %b, i32* %c) {
+define void @f(ptr %a, ptr %b, ptr %c) {
 entry:
   br label %for.body
 
@@ -31,30 +31,30 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
 
-  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
 ; Scope 1
 ; LV: = load {{.*}} !alias.scope !0
-  %loadA = load i32, i32* %arrayidxA, align 4
+  %loadA = load i32, ptr %arrayidxA, align 4
 
   %add = add nuw i32 %loadA, 2
 
-  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
 ; Noalias with scope 1 and 6
 ; LV: store {{.*}} !alias.scope !3, !noalias !5
 ; DSE-NOT: store
-  store i32 %add, i32* %arrayidxC, align 4
+  store i32 %add, ptr %arrayidxC, align 4
 
-  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
 ; Scope 6
 ; LV: = load {{.*}} !alias.scope !7
-  %loadB = load i32, i32* %arrayidxB, align 4
+  %loadB = load i32, ptr %arrayidxB, align 4
 
   %add2 = add nuw i32 %add, %loadB
 
 ; Noalias with scope 1 and 6
 ; LV: store {{.*}} !alias.scope !3, !noalias !5
 ; DSE: store
-  store i32 %add2, i32* %arrayidxC, align 4
+  store i32 %add2, ptr %arrayidxC, align 4
 
   %inc = add nuw nsw i64 %ind, 1
   %exitcond = icmp eq i64 %inc, 20
diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll
index 849cd93..a8e5eb2 100644
--- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll
+++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 entry:
   br label %for.body
 
@@ -15,13 +15,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -32,18 +32,17 @@ for.end:                                          ; preds = %for.body
 
 declare void @llvm.experimental.noalias.scope.decl(metadata)
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(%struct.data* nocapture readonly %d) {
+define void @test2(ptr nocapture readonly %d) {
 entry:
-  %b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1
-  %0 = load float*, float** %b, align 8
-  %ptrint = ptrtoint float* %0 to i64
+  %b = getelementptr inbounds %struct.data, ptr %d, i64 0, i32 1
+  %0 = load ptr, ptr %b, align 8
+  %ptrint = ptrtoint ptr %0 to i64
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
-  %a = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 0
-  %1 = load float*, float** %a, align 8
-  %ptrint2 = ptrtoint float* %1 to i64
+  %1 = load ptr, ptr %d, align 8
+  %ptrint2 = ptrtoint ptr %1 to i64
   %maskedptr3 = and i64 %ptrint2, 31
   %maskcond4 = icmp eq i64 %maskedptr3, 0
   br label %for.body
@@ -62,12 +61,12 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
-  %arrayidx = getelementptr inbounds float, float* %0, i64 %indvars.iv
-  %2 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %0, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
   %add = fadd float %2, 1.000000e+00
   tail call void @llvm.experimental.noalias.scope.decl(metadata !4)
-  %arrayidx5 = getelementptr inbounds float, float* %1, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %1, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body
@@ -76,7 +75,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @predicated_noalias_scope_decl(float* noalias nocapture readonly %a, float* noalias nocapture %b, i32 %n) {
+define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i32 %n) {
 
 ; Check that the vector.body still contains a llvm.experimental.noalias.scope.decl
 
@@ -117,11 +116,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %1
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %0
   br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/nofloat-report.ll b/llvm/test/Transforms/LoopVectorize/nofloat-report.ll
index 7026fd5..a61e0f6 100644
--- a/llvm/test/Transforms/LoopVectorize/nofloat-report.ll
+++ b/llvm/test/Transforms/LoopVectorize/nofloat-report.ll
@@ -14,9 +14,9 @@ define void @example_nofloat() noimplicitfloat { ;           <--------- "noimpli
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %3 = trunc i64 %indvars.iv to i32
-  store i32 %3, i32* %2, align 4
+  store i32 %3, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/nofloat.ll b/llvm/test/Transforms/LoopVectorize/nofloat.ll
index 6b4d119..08c42f3 100644
--- a/llvm/test/Transforms/LoopVectorize/nofloat.ll
+++ b/llvm/test/Transforms/LoopVectorize/nofloat.ll
@@ -14,9 +14,9 @@ define void @example12() noimplicitfloat { ;           <--------- "noimplicitflo
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %3 = trunc i64 %indvars.iv to i32
-  store i32 %3, i32* %2, align 4
+  store i32 %3, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/non-const-n.ll b/llvm/test/Transforms/LoopVectorize/non-const-n.ll
index d2033ad..a7d148a 100644
--- a/llvm/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/llvm/test/Transforms/LoopVectorize/non-const-n.ll
@@ -19,13 +19,13 @@ define void @example1(i32 %n) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n4
diff --git a/llvm/test/Transforms/LoopVectorize/nontemporal.ll b/llvm/test/Transforms/LoopVectorize/nontemporal.ll
index 55a0fc3..528c4b3 100644
--- a/llvm/test/Transforms/LoopVectorize/nontemporal.ll
+++ b/llvm/test/Transforms/LoopVectorize/nontemporal.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
 ; CHECK-LABEL: @foo(
-define void @foo(float* noalias %a, float* noalias %b, float* noalias %c, i32 %N) {
+define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i32 %N) {
 entry:
   %cmp.4 = icmp sgt i32 %N, 0
   br i1 %cmp.4, label %for.body.preheader, label %for.end
@@ -16,19 +16,19 @@ for.body:                                         ; preds = %for.body.preheader,
 
 ; Check that we don't lose !nontemporal hint when attempting vectorizing of loads.
 ; CHECK: load {{.*}} align 4, !nontemporal !0
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !nontemporal !0
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !nontemporal !0
 
 ; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
 ; CHECK: load {{.*}} align 4{{$}}
-  %arrayidx2 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %add = fadd float %0, %1
 
 ; Check that we don't lose !nontemporal hint when attempting vectorizing of stores.
 ; CHECK: store {{.*}} align 4, !nontemporal !0
-  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx4, align 4, !nontemporal !0
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx4, align 4, !nontemporal !0
 
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/nounroll.ll b/llvm/test/Transforms/LoopVectorize/nounroll.ll
index 020c0ff..7e4decb 100644
--- a/llvm/test/Transforms/LoopVectorize/nounroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/nounroll.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512"
 
 ; CHECK: LV: Checking a loop in 'f1'
 ; CHECK: LV: Loop hints: force=? width=0 interleave=1
-define dso_local void @f1(i32 signext %n, i32* %A) {
+define dso_local void @f1(i32 signext %n, ptr %A) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -16,9 +16,9 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !1
@@ -32,7 +32,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 
 ; CHECK: LV: Checking a loop in 'f2'
 ; CHECK: LV: Loop hints: force=? width=0 interleave=4
-define dso_local void @f2(i32 signext %n, i32* %A) {
+define dso_local void @f2(i32 signext %n, ptr %A) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -43,9 +43,9 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !3
@@ -59,7 +59,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 
 ; CHECK: LV: Checking a loop in 'f3'
 ; CHECK: LV: Loop hints: force=? width=0 interleave=1
-define dso_local void @f3(i32 signext %n, i32* %A) {
+define dso_local void @f3(i32 signext %n, ptr %A) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -70,9 +70,9 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !6
diff --git a/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll b/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll
index 00521e2..24bcfe0 100644
--- a/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll
+++ b/llvm/test/Transforms/LoopVectorize/novect-lcssa-cfg-invalidation.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; Checks what analyses are invalidated after Loop Vectorization when no actual
 ; vectorization happens, and the only change LV makes is LCSSA formation.
 
-define i32 @novect(i32* %p) {
+define i32 @novect(ptr %p) {
 
 ; CHECK:           Running pass: LoopVectorizePass on novect
 ; CHECK:           Invalidating analysis: ScalarEvolutionAnalysis on novect
@@ -19,7 +19,7 @@ define i32 @novect(i32* %p) {
 ; CHECK:             br label %middle
 ; CHECK:           middle:
 ; CHECK:             %iv = phi i32 [ 0, %entry ], [ %iv.next, %middle ]
-; CHECK:             %x = load volatile i32, i32* %p
+; CHECK:             %x = load volatile i32, ptr %p
 ; CHECK:             %iv.next = add i32 %iv, 1
 ; CHECK:             %cond = icmp slt i32 %iv, 1000
 ; CHECK:             br i1 %cond, label %exit, label %middle
@@ -32,7 +32,7 @@ entry:
 
 middle:
   %iv = phi i32 [0, %entry], [%iv.next, %middle]
-  %x = load volatile i32, i32* %p
+  %x = load volatile i32, ptr %p
   %iv.next = add i32 %iv, 1
   %cond = icmp slt i32 %iv, 1000
   br i1 %cond, label %exit, label %middle
diff --git a/llvm/test/Transforms/LoopVectorize/nuw.ll b/llvm/test/Transforms/LoopVectorize/nuw.ll
index e6476a8..c3b2d07 100644
--- a/llvm/test/Transforms/LoopVectorize/nuw.ll
+++ b/llvm/test/Transforms/LoopVectorize/nuw.ll
@@ -2,7 +2,7 @@
 
 ; Fixes PR43828
 
-define void @test(i32* %B) {
+define void @test(ptr %B) {
 ; CHECK-LABEL: @test(
 ; CHECK:       vector.body:
 ; CHECK-COUNT-2: sub <4 x i32>
@@ -23,7 +23,7 @@ inner_loop:
 
 outer_tail:
   %3 = phi i32 [ %0, %inner_loop ]
-  store atomic i32 %3, i32 * %B unordered, align 8
+  store atomic i32 %3, ptr %B unordered, align 8
   %4 = add i32 %local_4, 1
   %5 = icmp slt i32 %4, 6
   br i1 %5, label %outer_loop, label %exit
@@ -32,7 +32,7 @@ exit:
   ret void
 }
 
-define i32 @multi-instr(i32* noalias nocapture %A, i32* noalias nocapture %B, i32 %inc) {
+define i32 @multi-instr(ptr noalias nocapture %A, ptr noalias nocapture %B, i32 %inc) {
 ; CHECK-LABEL: @multi-instr(
 ; CHECK:       vector.body:
 ; CHECK-COUNT-4: add <4 x i32>
@@ -42,10 +42,10 @@ entry:
 loop:
   %iv = phi i32 [0, %entry], [%iv_inc, %loop]
   %redu = phi i32 [0, %entry], [%3, %loop]
-  %gepa = getelementptr inbounds i32, i32* %A, i32 %iv
-  %gepb = getelementptr inbounds i32, i32* %B, i32 %iv
-  %0 = load i32, i32* %gepa
-  %1 = load i32, i32* %gepb
+  %gepa = getelementptr inbounds i32, ptr %A, i32 %iv
+  %gepb = getelementptr inbounds i32, ptr %B, i32 %iv
+  %0 = load i32, ptr %gepa
+  %1 = load i32, ptr %gepb
   %2 = add nuw nsw i32 %redu, %0
   %3 = add nuw nsw i32 %2, %1
   %iv_inc = add nuw nsw i32 %iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/opt.ll b/llvm/test/Transforms/LoopVectorize/opt.ll
index c8f7302..37b6ed4 100644
--- a/llvm/test/Transforms/LoopVectorize/opt.ll
+++ b/llvm/test/Transforms/LoopVectorize/opt.ll
@@ -8,15 +8,15 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; LOOPVEC:       add <2 x i32>
 ; NOLOOPVEC-NOT: add <2 x i32>
 
-define i32 @vect(i32* %a) {
+define i32 @vect(ptr %a) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %add = add nsw i32 %0, %red.05
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 255
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
index a7affd1..56d27ff 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
 ; CHECK: LV: Checking a loop in 'f2'
 ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate.
 
-define signext i32 @f2(i8* noalias %A, i32 signext %n) {
+define signext i32 @f2(ptr noalias %A, i32 signext %n) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -18,11 +18,11 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx, align 1
   %add = add i8 %0, 1
-  %arrayidx3 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  store i8 %add, i8* %arrayidx3, align 1
+  %arrayidx3 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  store i8 %add, ptr %arrayidx3, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
@@ -41,7 +41,7 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ; CHECK: LV: Checking a loop in 'f3'
 ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate.
 
-define void @f3(i8* noalias %A, i32 signext %n) {
+define void @f3(ptr noalias %A, i32 signext %n) {
 entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
@@ -54,8 +54,8 @@ for.body:                                         ; preds = %for.body.preheader,
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
   %0 = trunc i64 %indvars.iv to i32
   %conv = trunc i32 %0 to i8
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  store i8 %conv, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv
+  store i8 %conv, ptr %arrayidx, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %for.body, label %for.end.loopexit
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
index ce8e25a..336bba7 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll
@@ -8,14 +8,14 @@ target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
 ; CHECK: LEV: Epilogue vectorization factor is forced.
 ; CHECK: Epilogue Loop VF:2, Epilogue Loop UF:1
 
-define void @f1(i8* %A) {
+define void @f1(ptr %A) {
 entry:
   br label %for.body
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %iv
-  store i8 1, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv
+  store i8 1, ptr %arrayidx, align 1
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp ne i64 %iv.next, 1024
   br i1 %exitcond, label %for.body, label %exit, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 9f6ee7e..1c40df3 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -19,11 +19,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, 202
   br i1 %exitcond, label %for.end, label %for.body
@@ -45,11 +45,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, 202
   br i1 %exitcond, label %for.end, label %for.body
@@ -71,11 +71,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, 202
   br i1 %exitcond, label %for.end, label %for.body
@@ -100,8 +100,8 @@ define void @pr43371() optsize {
 ; the non-consecutive loads/stores can be scalarized:
 ;
 ; CHECK: vector.body:
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: store i16 0, i16* %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
 ; CHECK: br i1 {{.*}}, label %vector.body
 ;
 entry:
@@ -114,8 +114,8 @@ for.body29:
   %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
   %add33 = add i16 undef, %i24.0170
   %idxprom34 = zext i16 %add33 to i32
-  %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
-  store i16 0, i16 * %arrayidx35, align 1
+  %arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
+  store i16 0, ptr %arrayidx35, align 1
   %inc37 = add i16 %i24.0170, 1
   %cmp26 = icmp ult i16 %inc37, 756
   br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
@@ -132,8 +132,8 @@ define void @pr43371_pgso() !prof !14 {
 ; the non-consecutive loads/stores can be scalarized:
 ;
 ; CHECK: vector.body:
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: store i16 0, i16* %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
+; CHECK: store i16 0, ptr %{{.*}}, align 1
 ; CHECK: br i1 {{.*}}, label %vector.body
 ;
 entry:
@@ -146,8 +146,8 @@ for.body29:
   %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
   %add33 = add i16 undef, %i24.0170
   %idxprom34 = zext i16 %add33 to i32
-  %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
-  store i16 0, i16 * %arrayidx35, align 1
+  %arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
+  store i16 0, ptr %arrayidx35, align 1
   %inc37 = add i16 %i24.0170, 1
   %cmp26 = icmp ult i16 %inc37, 756
   br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
@@ -221,7 +221,7 @@ exit:
 ; vectorization.
 
 ; NOTE: Some assertions have been autogenerated by utils/update_test_checks.py
-define void @stride1(i16* noalias %B, i32 %BStride) optsize {
+define void @stride1(ptr noalias %B, i32 %BStride) optsize {
 ; CHECK-LABEL: @stride1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -238,16 +238,16 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize {
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[B:%.*]], i32 [[TMP3]]
-; CHECK-NEXT:    store i16 42, i16* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[TMP3]]
+; CHECK-NEXT:    store i16 42, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[B]], i32 [[TMP6]]
-; CHECK-NEXT:    store i16 42, i16* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
+; CHECK-NEXT:    store i16 42, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
@@ -274,8 +274,8 @@ entry:
 for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %mulB = mul nsw i32 %iv, %BStride
-  %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
-  store i16 42, i16* %gepOfB, align 4
+  %gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
+  store i16 42, ptr %gepOfB, align 4
   %iv.next = add nuw nsw i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, 1025
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
@@ -286,7 +286,7 @@ for.end:
 
 ; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
 ;
-define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 {
+define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
 ; CHECK-LABEL: @stride1_pgso(
 ; CHECK: vector.body
 ;
@@ -302,8 +302,8 @@ entry:
 for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %mulB = mul nsw i32 %iv, %BStride
-  %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
-  store i16 42, i16* %gepOfB, align 4
+  %gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
+  store i16 42, ptr %gepOfB, align 4
   %iv.next = add nuw nsw i32 %iv, 1
   %exitcond = icmp eq i32 %iv.next, 1025
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
@@ -328,8 +328,8 @@ entry:
 for.body:                                        ; preds = %for.body, %entry
   %l1.02 = phi i16 [ 1, %entry ], [ %inc9, %for.body ]
   %mul = mul nsw i16 %l1.02, %stride
-  %arrayidx6 = getelementptr inbounds [1 x i16], [1 x i16]* @g, i16 0, i16 %mul
-  %0 = load i16, i16* %arrayidx6, align 1
+  %arrayidx6 = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 %mul
+  %0 = load i16, ptr %arrayidx6, align 1
   %inc9 = add nuw nsw i16 %l1.02, 1
   %exitcond.not = icmp eq i16 %inc9, 16
   br i1 %exitcond.not, label %for.end, label %for.body
@@ -340,7 +340,7 @@ for.end:                                        ; preds = %for.body
 
 ; Make sure we do not crash while building the VPlan for the loop with the
 ; select below.
-define i32 @PR48142(i32* %ptr.start, i32* %ptr.end) optsize {
+define i32 @PR48142(ptr %ptr.start, ptr %ptr.end) optsize {
 ; CHECK-LABEL: PR48142
 ; CHECK-NOT: vector.body
 entry:
@@ -348,12 +348,12 @@ entry:
 
 for.body:
   %i.014 = phi i32 [ 20, %entry ], [ %cond, %for.body ]
-  %ptr.iv = phi i32* [ %ptr.start, %entry ], [ %ptr.next, %for.body ]
+  %ptr.iv = phi ptr [ %ptr.start, %entry ], [ %ptr.next, %for.body ]
   %cmp4 = icmp slt i32 %i.014, 99
   %cond = select i1 %cmp4, i32 99, i32 %i.014
-  store i32 0, i32* %ptr.iv
-  %ptr.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
-  %cmp.not = icmp eq i32* %ptr.next, %ptr.end
+  store i32 0, ptr %ptr.iv
+  %ptr.next = getelementptr inbounds i32, ptr %ptr.iv, i64 1
+  %cmp.not = icmp eq ptr %ptr.next, %ptr.end
   br i1 %cmp.not, label %exit, label %for.body
 
 exit:
diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
index 18accec..7707a0b 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
@@ -7,7 +7,7 @@
 
 ; Test from PR45958.
 
-define void @test([2000 x i32]* %src, i64 %n) {
+define void @test(ptr %src, i64 %n) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -27,10 +27,10 @@ define void @test([2000 x i32]* %src, i64 %n) {
 ; CHECK-NEXT:    br label [[LOOP_32:%.*]]
 ; CHECK:       loop.32:
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, [[LOOP_2_HEADER1]] ], [ [[TMP2:%.*]], [[LOOP_32]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2000 x i32], [2000 x i32]* [[SRC:%.*]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2000 x i32], ptr [[SRC:%.*]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], <i32 10, i32 10, i32 10, i32 10>
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[TMP1]], <4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
@@ -61,10 +61,10 @@ define void @test([2000 x i32]* %src, i64 %n) {
 ; CHECK-NEXT:    br label [[LOOP_3:%.*]]
 ; CHECK:       loop.3:
 ; CHECK-NEXT:    [[IV_3:%.*]] = phi i64 [ 0, [[LOOP_2_HEADER]] ], [ [[IV_3_NEXT:%.*]], [[LOOP_3]] ]
-; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds [2000 x i32], [2000 x i32]* [[SRC]], i64 [[IV_1]], i64 [[IV_3]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, i32* [[GEP_SRC]], align 4
+; CHECK-NEXT:    [[GEP_SRC:%.*]] = getelementptr inbounds [2000 x i32], ptr [[SRC]], i64 [[IV_1]], i64 [[IV_3]]
+; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[GEP_SRC]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[L1]], 10
-; CHECK-NEXT:    store i32 [[MUL]], i32* [[GEP_SRC]], align 4
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[GEP_SRC]], align 4
 ; CHECK-NEXT:    [[IV_3_NEXT]] = add nuw nsw i64 [[IV_3]], 1
 ; CHECK-NEXT:    [[EC_3:%.*]] = icmp eq i64 [[IV_3_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EC_3]], label [[LOOP_2_LATCH]], label [[LOOP_3]]
@@ -92,10 +92,10 @@ loop.2.header:
 
 loop.3:
   %iv.3 = phi i64 [ 0, %loop.2.header ], [ %iv.3.next, %loop.3 ]
-  %gep.src = getelementptr inbounds [2000 x i32], [2000 x i32]* %src, i64 %iv.1, i64 %iv.3
-  %l1 = load i32, i32* %gep.src, align 4
+  %gep.src = getelementptr inbounds [2000 x i32], ptr %src, i64 %iv.1, i64 %iv.3
+  %l1 = load i32, ptr %gep.src, align 4
   %mul = mul nsw i32 %l1, 10
-  store i32 %mul, i32* %gep.src, align 4
+  store i32 %mul, ptr %gep.src, align 4
   %iv.3.next = add nuw nsw i64 %iv.3, 1
   %ec.3 = icmp eq i64 %iv.3.next, %n
   br i1 %ec.3, label %loop.2.latch, label %loop.3
diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
index 6c62e40..f3cb293 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll
@@ -21,17 +21,17 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[VecIndTr]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
 ; CHECK: br label %[[InnerLoop:.+]]
 
 ; CHECK: [[InnerLoop]]:
 ; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ]
-; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StoreVal]], <4 x i32*> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
+; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true
 ; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
 ; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
 ; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
@@ -53,17 +53,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3
diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
index f00a2b8..3bb85ab 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
@@ -22,20 +22,20 @@
 ; CHECK-LABEL: vector.body:
 ; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
 ; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
-; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, <4 x i64> %[[VecInd]]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[CSplat]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @A, i64 0, <4 x i64> %[[VecInd]]
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[CSplat]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]]
 
 ; CHECK: [[InnerForPh]]:
-; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
 ; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
 ; CHECK: br label %[[InnerForBody:.*]]
 
 ; CHECK: [[InnerForBody]]:
 ; CHECK: %[[InnerInd:.*]] = phi <4 x i64> [ zeroinitializer, %[[InnerForPh]] ], [ %[[InnerIndNext:.*]], %[[InnerForBody]] ]
 ; CHECK: %[[AccumPhi:.*]] = phi <4 x i32> [ %[[WideAVal]], %[[InnerForPh]] ], [ %[[AccumPhiNext:.*]], %[[InnerForBody]] ]
-; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, <4 x i64> %[[InnerInd]]
-; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[BAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
+; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, <4 x i64> %[[InnerInd]]
+; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[BAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
 ; CHECK: %[[Add1:.*]] = add nsw <4 x i32> %[[WideBVal]], %[[VecIndTr]]
 ; CHECK: %[[AccumPhiNext]] = add nsw <4 x i32> %[[Add1]], %[[AccumPhi]]
 ; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], <i64 1, i64 1, i64 1, i64 1>
@@ -45,7 +45,7 @@
 
 ; CHECK: [[InnerCrit]]:
 ; CHECK: %[[StorePhi:.*]] = phi <4 x i32> [ %[[AccumPhiNext]], %[[InnerForBody]] ]
-; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StorePhi]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StorePhi]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK:  br label %[[ForInc]]
 
 ; CHECK: [[ForInc]]:
@@ -71,20 +71,20 @@ for.body.lr.ph:                                   ; preds = %entry
 
 for.body:                                         ; preds = %for.inc9, %for.body.lr.ph
   %indvars.iv25 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next26, %for.inc9 ]
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv25
-  store i32 %c, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv25
+  store i32 %c, ptr %arrayidx, align 4
   br i1 %cmp220, label %for.body3.lr.ph, label %for.inc9
 
 for.body3.lr.ph:                                  ; preds = %for.body
-  %arrayidx.promoted = load i32, i32* %arrayidx, align 4
+  %arrayidx.promoted = load i32, ptr %arrayidx, align 4
   %0 = trunc i64 %indvars.iv25 to i32
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
   %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
   %1 = phi i32 [ %arrayidx.promoted, %for.body3.lr.ph ], [ %add8, %for.body3 ]
-  %arrayidx5 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
-  %2 = load i32, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx5, align 4
   %add = add nsw i32 %2, %0
   %add8 = add nsw i32 %add, %1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -92,7 +92,7 @@ for.body3:                                        ; preds = %for.body3, %for.bod
   br i1 %exitcond, label %for.cond1.for.inc9_crit_edge, label %for.body3
 
 for.cond1.for.inc9_crit_edge:                     ; preds = %for.body3
-  store i32 %add8, i32* %arrayidx, align 4
+  store i32 %add8, ptr %arrayidx, align 4
   br label %for.inc9
 
 for.inc9:                                         ; preds = %for.cond1.for.inc9_crit_edge, %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll
index 8010b4b..d700d48 100644
--- a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll
+++ b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll
@@ -5,12 +5,12 @@
 ; CHECK-LABEL: @foo
 ; CHECK-LABEL: for.end.inner.loopexit:
 ; CHECK: %[[LCSSAPHI:.*]] = phi i64 [ %indvars.iv, %for.body.inner ], [ %{{.*}}, %middle.block ]
-; CHECK: store i64 %[[LCSSAPHI]], i64* %O1, align 4
+; CHECK: store i64 %[[LCSSAPHI]], ptr %O1, align 4
 ; CHECK-LABEL: for.end.outer.loopexit
-; CHECK: store i64 %indvars.outer, i64* %O2, align 4
+; CHECK: store i64 %indvars.outer, ptr %O2, align 4
 
 
-define i64 @foo(i32* nocapture %A, i32* nocapture %B, i64 %n, i64 %m, i64* %O1, i64* %O2) {
+define i64 @foo(ptr nocapture %A, ptr nocapture %B, i64 %n, i64 %m, ptr %O1, ptr %O2) {
 entry:
   %cmp = icmp sgt i64 %n, 0
   br i1 %cmp, label %for.body.outer.preheader, label %for.end.outer
@@ -28,16 +28,16 @@ for.body.inner.preheader:                         ; preds = %for.body.outer
 
 for.body.inner:                                   ; preds = %for.body.inner.preheader, %for.body.inner
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body.inner ], [ 0, %for.body.inner.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %v = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  store i32 %v, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %v = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  store i32 %v, ptr %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, %n
   br i1 %exitcond, label %for.end.inner.loopexit, label %for.body.inner
 
 for.end.inner.loopexit:                           ; preds = %for.body.inner
-  store i64 %indvars.iv, i64 *%O1, align 4
+  store i64 %indvars.iv, ptr %O1, align 4
   br label %for.end.inner
 
 for.end.inner:                                    ; preds = %for.end.inner.loopexit, %for.body.outer
@@ -46,7 +46,7 @@ for.end.inner:                                    ; preds = %for.end.inner.loope
   br i1 %exitcond.outer, label %for.end.outer.loopexit, label %for.body.outer
 
 for.end.outer.loopexit:                           ; preds = %for.end.inner
-  store i64 %indvars.outer, i64 *%O2, align 4
+  store i64 %indvars.outer, ptr %O2, align 4
   br label %for.end.outer
 
 for.end.outer:                                    ; preds = %for.end.outer.loopexit, %entry
diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
index d8f74006..e571b62 100644
--- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
@@ -9,22 +9,22 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK:       define void @phi_two_incoming_values(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* {{.*}}
+; CHECK:         [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{.*}}
 ; CHECK:         [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i32>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = add <2 x i32> [[WIDE_LOAD]], [[TMP6]]
-; CHECK:         store <2 x i32> [[PREDPHI]], <2 x i32>* {{.*}}
+; CHECK:         store <2 x i32> [[PREDPHI]], ptr {{.*}}
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ;
-define void @phi_two_incoming_values(i32* %a, i32* %b, i64 %n) {
+define void @phi_two_incoming_values(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
-  %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %b, i64 %i
   %tmp3 = icmp sgt i32 %tmp1, 0
   br i1 %tmp3, label %if.then, label %if.end
 
@@ -34,7 +34,7 @@ if.then:
 
 if.end:
   %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ]
-  store i32 %tmp5, i32* %tmp2, align 4
+  store i32 %tmp5, ptr %tmp2, align 4
   %i.next = add i64 %i, 1
   %cond = icmp eq i64 %i, %n
   br i1 %cond, label %for.end, label %for.body
@@ -51,19 +51,19 @@ for.end:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
 ; CHECK:         [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> <i32 3, i32 3>, <2 x i32> <i32 9, i32 9>
 ; CHECK:         [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> [[PREDPHI]]
-; CHECK:         store <2 x i32> [[PREDPHI7]], <2 x i32>* {{.*}}
+; CHECK:         store <2 x i32> [[PREDPHI7]], ptr {{.*}}
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ;
-define void @phi_three_incoming_values(i32* %a, i32* %b, i64 %n) {
+define void @phi_three_incoming_values(ptr %a, ptr %b, i64 %n) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
-  %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i
-  %tmp1 = load i32, i32* %tmp0, align 4
-  %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i
-  %tmp3 = load i32, i32* %tmp2, align 4
+  %tmp0 = getelementptr inbounds i32, ptr %a, i64 %i
+  %tmp1 = load i32, ptr %tmp0, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %b, i64 %i
+  %tmp3 = load i32, ptr %tmp2, align 4
   %tmp4 = icmp sgt i32 %tmp1, %tmp3
   br i1 %tmp4, label %if.then, label %if.end
 
@@ -78,7 +78,7 @@ if.else:
 
 if.end:
   %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ]
-  store i32 %tmp8, i32* %tmp0, align 4
+  store i32 %tmp8, ptr %tmp0, align 4
   %i.next = add i64 %i, 1
   %cond = icmp eq i64 %i, %n
   br i1 %cond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/pr25281.ll b/llvm/test/Transforms/LoopVectorize/pr25281.ll
index d062b46..06031d7 100644
--- a/llvm/test/Transforms/LoopVectorize/pr25281.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr25281.ll
@@ -4,16 +4,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; PR25281
 ; Just check that we don't crash on this test.
 ; CHECK-LABEL: @foo
-define void @foo(float** noalias nocapture readonly %in, i32* noalias nocapture readonly %isCompressed, float* noalias nocapture readonly %out) {
+define void @foo(ptr noalias nocapture readonly %in, ptr noalias nocapture readonly %isCompressed, ptr noalias nocapture readonly %out) {
 entry_block:
-  %tmp = getelementptr float*, float** %in, i32 0
-  %in_0 = load float*, float** %tmp
-  %tmp1 = getelementptr i32, i32* %isCompressed, i32 0
-  %isCompressed_0 = load i32, i32* %tmp1
-  %tmp2 = getelementptr float*, float** %in, i32 1
-  %in_1 = load float*, float** %tmp2
-  %tmp3 = getelementptr i32, i32* %isCompressed, i32 1
-  %isCompressed_1 = load i32, i32* %tmp3
+  %in_0 = load ptr, ptr %in
+  %isCompressed_0 = load i32, ptr %isCompressed
+  %tmp2 = getelementptr ptr, ptr %in, i32 1
+  %in_1 = load ptr, ptr %tmp2
+  %tmp3 = getelementptr i32, ptr %isCompressed, i32 1
+  %isCompressed_1 = load i32, ptr %tmp3
   br label %for_each_frames
 
 for_each_frames:
@@ -28,16 +26,16 @@ for_each_channel:
   %tmp5 = xor i32 %isCompressed_0, 1
   %tmp6 = mul i32 %frameIndex, %tmp5
   %offset0 = add i32 %tmp6, %channelIndex
-  %tmp7 = getelementptr float, float* %in_0, i32 %offset0
-  %in_0_index = load float, float* %tmp7, align 4
+  %tmp7 = getelementptr float, ptr %in_0, i32 %offset0
+  %in_0_index = load float, ptr %tmp7, align 4
   %tmp8 = xor i32 %isCompressed_1, 1
   %tmp9 = mul i32 %frameIndex, %tmp8
   %offset1 = add i32 %tmp9, %channelIndex
-  %tmp10 = getelementptr float, float* %in_1, i32 %offset1
-  %in_1_index = load float, float* %tmp10, align 4
+  %tmp10 = getelementptr float, ptr %in_1, i32 %offset1
+  %in_1_index = load float, ptr %tmp10, align 4
   %tmp11 = fadd float %in_0_index, %in_1_index
-  %tmp12 = getelementptr float, float* %out, i32 %tmp4
-  store float %tmp11, float* %tmp12, align 4
+  %tmp12 = getelementptr float, ptr %out, i32 %tmp4
+  store float %tmp11, ptr %tmp12, align 4
   %tmp13 = icmp eq i32 %nextChannelIndex, 2
   br i1 %tmp13, label %for_each_frames_end, label %for_each_channel
 
diff --git a/llvm/test/Transforms/LoopVectorize/pr28541.ll b/llvm/test/Transforms/LoopVectorize/pr28541.ll
index 73f02a3..ad7f6e7 100644
--- a/llvm/test/Transforms/LoopVectorize/pr28541.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr28541.ll
@@ -30,14 +30,14 @@
 
 define i32 @fn1() {
 entry:
-  %tmp2 = load i32, i32* @b, align 4
+  %tmp2 = load i32, ptr @b, align 4
   %dec3 = add nsw i32 %tmp2, -1
-  store i32 %dec3, i32* @b, align 4
+  store i32 %dec3, ptr @b, align 4
   %tobool4 = icmp eq i32 %tmp2, 0
   br i1 %tobool4, label %while.end, label %while.body.lr.ph
 
 while.body.lr.ph:                                 ; preds = %entry
-  %tmp1 = load i32, i32* @a, align 4
+  %tmp1 = load i32, ptr @a, align 4
   %and = and i32 %tmp1, 3
   %switch = icmp eq i32 %and, 0
   br label %while.body
@@ -62,8 +62,8 @@ do.cond:                                          ; preds = %do.body, %while.bod
   br i1 %tobool3, label %while.cond, label %do.body
 
 while.cond.while.end_crit_edge:                   ; preds = %while.cond
-  store i32 0, i32* @c, align 4
-  store i32 -1, i32* @b, align 4
+  store i32 0, ptr @c, align 4
+  store i32 -1, ptr @b, align 4
   br label %while.end
 
 while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
diff --git a/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll b/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
index 393e7bb..7fa1785 100644
--- a/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
@@ -24,17 +24,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 @theSize = external local_unnamed_addr global i32, align 4
 
-define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
+define void @foo(ptr %buf, i32 %denominator, ptr %flag) local_unnamed_addr {
 entry:
   %i = alloca i32, align 4
-  %0 = load i32, i32* @theSize, align 4
+  %0 = load i32, ptr @theSize, align 4
   %div = udiv i32 %0, %denominator
-  %1 = load i32, i32* %flag, align 4
+  %1 = load i32, ptr %flag, align 4
   %tobool5 = icmp eq i32 %1, 0
   br i1 %tobool5, label %while.end, label %while.body.lr.ph
 
 while.body.lr.ph:                                 ; preds = %entry
-  %2 = bitcast i32* %i to i8*
   br label %while.body
 
 while.body:                                       ; preds = %while.body.lr.ph, %while.body
@@ -43,16 +42,16 @@ while.body:                                       ; preds = %while.body.lr.ph, %
 ; CHECK:        phi
 ; CHECK-NEXT:   phi
 ; CHECK-NEXT:   sext
-  %buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
+  %buf.addr.07 = phi ptr [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
   %inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
   %add = add nsw i32 %inx.06, %div
-  %3 = load i32, i32* @theSize, align 4
-  store i32 %3, i32* %i, align 4
+  %2 = load i32, ptr @theSize, align 4
+  store i32 %2, ptr %i, align 4
   %conv = sext i32 %add to i64
-  call void @bar(i32* nonnull %i, i64 %conv)
-  %call = call i8* @processBuf(i8* %buf.addr.07)
-  %4 = load i32, i32* %flag, align 4
-  %tobool = icmp eq i32 %4, 0
+  call void @bar(ptr nonnull %i, i64 %conv)
+  %call = call ptr @processBuf(ptr %buf.addr.07)
+  %3 = load i32, ptr %flag, align 4
+  %tobool = icmp eq i32 %3, 0
   br i1 %tobool, label %while.end.loopexit, label %while.body
 
 while.end.loopexit:                               ; preds = %while.body
@@ -62,5 +61,5 @@ while.end:                                        ; preds = %while.end.loopexit,
   ret void
 }
 
-declare void @bar(i32*, i64) local_unnamed_addr
-declare i8* @processBuf(i8*) local_unnamed_addr
+declare void @bar(ptr, i64) local_unnamed_addr
+declare ptr @processBuf(ptr) local_unnamed_addr
diff --git a/llvm/test/Transforms/LoopVectorize/pr31098.ll b/llvm/test/Transforms/LoopVectorize/pr31098.ll
index 828c191..e983f53 100644
--- a/llvm/test/Transforms/LoopVectorize/pr31098.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr31098.ll
@@ -60,7 +60,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 %class.Complex = type { float, float }
 
-define void @Test(%class.Complex* nocapture %out, i64 %size) local_unnamed_addr {
+define void @Test(ptr nocapture %out, i64 %size) local_unnamed_addr {
 entry:
   %div = lshr i64 %size, 1
   %cmp47 = icmp eq i64 %div, 0
@@ -77,23 +77,23 @@ for.cond.cleanup:
 
 for.body:
   %offset.048 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %0 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 0
-  %1 = load float, float* %0, align 4
-  %imaginary_.i.i = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %offset.048, i32 1
-  %2 = load float, float* %imaginary_.i.i, align 4
+  %0 = getelementptr inbounds %class.Complex, ptr %out, i64 %offset.048, i32 0
+  %1 = load float, ptr %0, align 4
+  %imaginary_.i.i = getelementptr inbounds %class.Complex, ptr %out, i64 %offset.048, i32 1
+  %2 = load float, ptr %imaginary_.i.i, align 4
   %add = add nuw i64 %offset.048, %div
-  %3 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 0
-  %4 = load float, float* %3, align 4
-  %imaginary_.i.i28 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add, i32 1
-  %5 = load float, float* %imaginary_.i.i28, align 4
+  %3 = getelementptr inbounds %class.Complex, ptr %out, i64 %add, i32 0
+  %4 = load float, ptr %3, align 4
+  %imaginary_.i.i28 = getelementptr inbounds %class.Complex, ptr %out, i64 %add, i32 1
+  %5 = load float, ptr %imaginary_.i.i28, align 4
   %add.i = fadd fast float %4, %1
   %add4.i = fadd fast float %5, %2
-  store float %add.i, float* %0, align 4
-  store float %add4.i, float* %imaginary_.i.i, align 4
+  store float %add.i, ptr %0, align 4
+  store float %add4.i, ptr %imaginary_.i.i, align 4
   %sub.i = fsub fast float %1, %4
   %sub4.i = fsub fast float %2, %5
-  store float %sub.i, float* %3, align 4
-  store float %sub4.i, float* %imaginary_.i.i28, align 4
+  store float %sub.i, ptr %3, align 4
+  store float %sub4.i, ptr %imaginary_.i.i28, align 4
   %inc = add nuw nsw i64 %offset.048, 1
   %exitcond = icmp eq i64 %inc, %div
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/pr31190.ll b/llvm/test/Transforms/LoopVectorize/pr31190.ll
index c9790be..b6cacf1 100644
--- a/llvm/test/Transforms/LoopVectorize/pr31190.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr31190.ll
@@ -36,8 +36,8 @@
 ; CHECK-LABEL: @test
 define void @test() {
 entry:
-  %a.promoted2 = load i32, i32* @a, align 1
-  %c.promoted = load i32, i32* @c, align 1
+  %a.promoted2 = load i32, ptr @a, align 1
+  %c.promoted = load i32, ptr @c, align 1
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %for.cond1.for.inc4_crit_edge, %entry
@@ -49,8 +49,8 @@ for.body3:                                        ; preds = %for.body3, %for.con
   %inc1 = phi i32 [ %inc.lcssa3, %for.cond1.preheader ], [ %inc, %for.body3 ]
   %0 = phi i32 [ undef, %for.cond1.preheader ], [ %inc54, %for.body3 ]
   %idxprom = sext i32 %0 to i64
-  %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @b, i64 0, i64 %idxprom
-  store i32 4, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1 x i32], ptr @b, i64 0, i64 %idxprom
+  store i32 4, ptr %arrayidx, align 4
   %inc = add nsw i32 %inc1, 1
   %tobool2 = icmp eq i32 %inc, 0
   br i1 %tobool2, label %for.cond1.for.inc4_crit_edge, label %for.body3
diff --git a/llvm/test/Transforms/LoopVectorize/pr33706.ll b/llvm/test/Transforms/LoopVectorize/pr33706.ll
index fe314d7..20d58f4 100644
--- a/llvm/test/Transforms/LoopVectorize/pr33706.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr33706.ll
@@ -6,17 +6,17 @@
 
 ; CHECK-LABEL: @PR33706
 ; CHECK-NOT: <2 x i32>
-define void @PR33706(float* nocapture readonly %arg, float* nocapture %arg1, i32 %arg2) local_unnamed_addr {
+define void @PR33706(ptr nocapture readonly %arg, ptr nocapture %arg1, i32 %arg2) local_unnamed_addr {
 bb:
-  %tmp = load i32, i32* @global.1, align 4
-  %tmp3 = getelementptr inbounds float, float* %arg, i64 190
-  %tmp4 = getelementptr inbounds float, float* %arg1, i64 512
+  %tmp = load i32, ptr @global.1, align 4
+  %tmp3 = getelementptr inbounds float, ptr %arg, i64 190
+  %tmp4 = getelementptr inbounds float, ptr %arg1, i64 512
   %tmp5 = and i32 %tmp, 65535
   %tmp6 = icmp ugt i32 %arg2, 65536
   br i1 %tmp6, label %bb7, label %bb9
 
 bb7:                                              ; preds = %bb
-  %tmp8 = load i32, i32* @global, align 4
+  %tmp8 = load i32, ptr @global, align 4
   br label %bb27
 
 bb9:                                              ; preds = %bb
@@ -25,37 +25,37 @@ bb9:                                              ; preds = %bb
 
 bb11:                                             ; preds = %bb11, %bb9
   %tmp12 = phi i32 [ %tmp20, %bb11 ], [ %tmp5, %bb9 ]
-  %tmp13 = phi float* [ %tmp18, %bb11 ], [ %tmp4, %bb9 ]
+  %tmp13 = phi ptr [ %tmp18, %bb11 ], [ %tmp4, %bb9 ]
   %tmp14 = phi i32 [ %tmp16, %bb11 ], [ %tmp10, %bb9 ]
   %tmp15 = phi i32 [ %tmp19, %bb11 ], [ %tmp, %bb9 ]
   %tmp16 = add nsw i32 %tmp14, -1
   %tmp17 = sitofp i32 %tmp12 to float
-  store float %tmp17, float* %tmp13, align 4
-  %tmp18 = getelementptr inbounds float, float* %tmp13, i64 1
+  store float %tmp17, ptr %tmp13, align 4
+  %tmp18 = getelementptr inbounds float, ptr %tmp13, i64 1
   %tmp19 = add i32 %tmp15, %arg2
   %tmp20 = and i32 %tmp19, 65535
   %tmp21 = icmp eq i32 %tmp16, 0
   br i1 %tmp21, label %bb22, label %bb11
 
 bb22:                                             ; preds = %bb11
-  %tmp23 = phi float* [ %tmp18, %bb11 ]
+  %tmp23 = phi ptr [ %tmp18, %bb11 ]
   %tmp24 = phi i32 [ %tmp19, %bb11 ]
   %tmp25 = phi i32 [ %tmp20, %bb11 ]
   %tmp26 = ashr i32 %tmp24, 16
-  store i32 %tmp26, i32* @global, align 4
+  store i32 %tmp26, ptr @global, align 4
   br label %bb27
 
 bb27:                                             ; preds = %bb22, %bb7
   %tmp28 = phi i32 [ %tmp26, %bb22 ], [ %tmp8, %bb7 ]
-  %tmp29 = phi float* [ %tmp23, %bb22 ], [ %tmp4, %bb7 ]
+  %tmp29 = phi ptr [ %tmp23, %bb22 ], [ %tmp4, %bb7 ]
   %tmp30 = phi i32 [ %tmp25, %bb22 ], [ %tmp5, %bb7 ]
   %tmp31 = sext i32 %tmp28 to i64
-  %tmp32 = getelementptr inbounds float, float* %tmp3, i64 %tmp31
-  %tmp33 = load float, float* %tmp32, align 4
+  %tmp32 = getelementptr inbounds float, ptr %tmp3, i64 %tmp31
+  %tmp33 = load float, ptr %tmp32, align 4
   %tmp34 = sitofp i32 %tmp30 to float
-  %tmp35 = load float, float* @global.2, align 4
+  %tmp35 = load float, ptr @global.2, align 4
   %tmp36 = fmul float %tmp35, %tmp34
   %tmp37 = fadd float %tmp33, %tmp36
-  store float %tmp37, float* %tmp29, align 4
+  store float %tmp37, ptr %tmp29, align 4
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/pr34681.ll b/llvm/test/Transforms/LoopVectorize/pr34681.ll
index 8ee43d4..1c2b37e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr34681.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr34681.ll
@@ -32,7 +32,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: scalar.ph
 
 
-define i32 @foo1(i32 %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j)  {
+define i32 @foo1(i32 %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i32 %i, i32 %j)  {
 entry:
   %cmp8 = icmp eq i32 %N, 0
   br i1 %cmp8, label %for.end, label %for.body.lr.ph
@@ -45,8 +45,8 @@ for.body:
   %k.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = mul i32 %k.09, %N
   %add = add i32 %mul, %j
-  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+  %0 = load i16, ptr %arrayidx, align 2
   %conv = sext i16 %0 to i32
   %add1 = add nsw i32 %tmp.010, %conv
   %inc = add nuw i32 %k.09, 1
@@ -92,7 +92,7 @@ for.end:
 ; CHECK: middle.block
 ; CHECK: scalar.ph
 
-define i32 @foo2(i16 zeroext %N, i16* nocapture readnone %A, i16* nocapture readonly %B, i32 %i, i32 %j) {
+define i32 @foo2(i16 zeroext %N, ptr nocapture readnone %A, ptr nocapture readonly %B, i32 %i, i32 %j) {
 entry:
   %conv = zext i16 %N to i32
   %cmp11 = icmp eq i16 %N, 0
@@ -106,8 +106,8 @@ for.body:
   %k.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = mul nuw i32 %k.012, %conv
   %add = add i32 %mul, %j
-  %arrayidx = getelementptr inbounds i16, i16* %B, i32 %add
-  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
+  %0 = load i16, ptr %arrayidx, align 2
   %conv3 = sext i16 %0 to i32
   %add4 = add nsw i32 %tmp.013, %conv3
   %inc = add nuw nsw i32 %k.012, 1
diff --git a/llvm/test/Transforms/LoopVectorize/pr36311.ll b/llvm/test/Transforms/LoopVectorize/pr36311.ll
index 9d61af6..442004e 100644
--- a/llvm/test/Transforms/LoopVectorize/pr36311.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr36311.ll
@@ -13,7 +13,7 @@ $test = comdat any
 declare i32 @__gxx_personality_v0(...)
 
 ; Function Attrs: uwtable
-define dso_local void @test() local_unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define dso_local void @test() local_unnamed_addr #0 comdat align 2 personality ptr @__gxx_personality_v0 {
 entry:
   br label %for.body51
 
@@ -38,8 +38,8 @@ for.body89:                                       ; preds = %for.body89, %for.bo
   %add93 = add i32 %add92, %mul91
   %inc94 = add i32 %next_index.5174, 1
   %conv95 = zext i32 %next_index.5174 to i64
-  %arrayidx.i160 = getelementptr inbounds i32, i32* undef, i64 %conv95
-  store i32 %add93, i32* %arrayidx.i160, align 4
+  %arrayidx.i160 = getelementptr inbounds i32, ptr undef, i64 %conv95
+  store i32 %add93, ptr %arrayidx.i160, align 4
 ;, !tbaa !1
   %cmp87 = icmp ult i32 %add92, undef
   br i1 %cmp87, label %for.body89, label %for.cond80.loopexit
diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll
index c9d22be..64c102f 100644
--- a/llvm/test/Transforms/LoopVectorize/pr37248.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll
@@ -26,13 +26,13 @@ for.body:                                         ; preds = %land.end, %entry
   br i1 undef, label %land.end, label %land.rhs
 
 land.rhs:                                         ; preds = %for.body
-  %1 = load i32, i32* undef, align 1
+  %1 = load i32, ptr undef, align 1
   br label %land.end
 
 land.end:                                         ; preds = %land.rhs, %for.body
   %2 = trunc i32 %0 to i16
-  %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @a, i16 0, i16 %2
-  store i16 undef, i16* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 %2
+  store i16 undef, ptr %arrayidx, align 1
   %dec = add nsw i32 %0, -1
   %cmp = icmp sgt i32 %0, 1
   br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
diff --git a/llvm/test/Transforms/LoopVectorize/pr39099.ll b/llvm/test/Transforms/LoopVectorize/pr39099.ll
index 7f91eee..9c2eb3d 100644
--- a/llvm/test/Transforms/LoopVectorize/pr39099.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr39099.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
 ; CHECK: LV: Analyzing interleaved accesses...
 ; CHECK-NOT: LV: Creating an interleave group
 
-define dso_local void @masked_strided(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
+define dso_local void @masked_strided(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr {
 entry:
   %conv = zext i8 %guard to i32
   br label %for.body
@@ -22,14 +22,14 @@ for.body:
 
 if.then:
   %mul = shl nuw nsw i32 %ix.017, 1
-  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %mul
-  %0 = load i8, i8* %arrayidx, align 1
-  %arrayidx4 = getelementptr inbounds i8, i8* %q, i32 %mul
-  store i8 %0, i8* %arrayidx4, align 1
+  %arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
+  %0 = load i8, ptr %arrayidx, align 1
+  %arrayidx4 = getelementptr inbounds i8, ptr %q, i32 %mul
+  store i8 %0, ptr %arrayidx4, align 1
   %sub = sub i8 0, %0
   %add = or i32 %mul, 1
-  %arrayidx8 = getelementptr inbounds i8, i8* %q, i32 %add
-  store i8 %sub, i8* %arrayidx8, align 1
+  %arrayidx8 = getelementptr inbounds i8, ptr %q, i32 %add
+  store i8 %sub, ptr %arrayidx8, align 1
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
index d01f977..06f70a4 100644
--- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
@@ -16,7 +16,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_SREM_CONTINUE2:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* @v_38, align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @v_38, align 1
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer
@@ -39,7 +39,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> <i16 5786, i16 5786>, <2 x i16> [[TMP10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
-; CHECK-NEXT:    store i16 [[TMP11]], i16* @v_39, align 1
+; CHECK-NEXT:    store i16 [[TMP11]], ptr @v_39, align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -51,7 +51,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC7:%.*]], [[FOR_LATCH:%.*]] ]
-; CHECK-NEXT:    [[LV:%.*]] = load i16, i16* @v_38, align 1
+; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr @v_38, align 1
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i16 [[LV]], 32767
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[COND_END:%.*]], label [[COND_END]]
 ; CHECK:       cond.end:
@@ -62,12 +62,12 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    br label [[FOR_LATCH]]
 ; CHECK:       for.latch:
 ; CHECK-NEXT:    [[COND6:%.*]] = phi i16 [ [[REM]], [[COND_FALSE4]] ], [ 5786, [[COND_END]] ]
-; CHECK-NEXT:    store i16 [[COND6]], i16* @v_39, align 1
+; CHECK-NEXT:    store i16 [[COND6]], ptr @v_39, align 1
 ; CHECK-NEXT:    [[INC7]] = add nsw i16 [[I_07]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[INC7]], 111
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[RV:%.*]] = load i16, i16* @v_39, align 1
+; CHECK-NEXT:    [[RV:%.*]] = load i16, ptr @v_39, align 1
 ; CHECK-NEXT:    ret i16 [[RV]]
 ;
 entry:
@@ -75,7 +75,7 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.latch
   %i.07 = phi i16 [ 99, %entry ], [ %inc7, %for.latch ]
-  %lv = load i16, i16* @v_38, align 1
+  %lv = load i16, ptr @v_38, align 1
   %cmp1 = icmp eq i16 %lv, 32767
   br i1 %cmp1, label %cond.end, label %cond.end
 
@@ -89,12 +89,12 @@ cond.false4:                                      ; preds = %cond.end
 
 for.latch:                                        ; preds = %cond.end, %cond.false4
   %cond6 = phi i16 [ %rem, %cond.false4 ], [ 5786, %cond.end ]
-  store i16 %cond6, i16* @v_39, align 1
+  store i16 %cond6, ptr @v_39, align 1
   %inc7 = add nsw i16 %i.07, 1
   %cmp = icmp slt i16 %inc7, 111
   br i1 %cmp, label %for.body, label %exit
 
 exit:                                 ; preds = %for.latch
-  %rv = load i16, i16* @v_39, align 1
+  %rv = load i16, ptr @v_39, align 1
   ret i16 %rv
 }
diff --git a/llvm/test/Transforms/LoopVectorize/pr45525.ll b/llvm/test/Transforms/LoopVectorize/pr45525.ll
index 2303421..e52d98b 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45525.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45525.ll
@@ -2,7 +2,7 @@
 
 ; Test case for PR45525. Checks that phi's with a single predecessor and a mask are supported.
 
-define void @main(i1 %cond, i32* %arr) {
+define void @main(i1 %cond, ptr %arr) {
 ; CHECK-LABEL: @main(
 ; CHECK-NEXT:  bb.0:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -26,8 +26,8 @@ bb.2:                                             ; preds = %bb.1
 
 bb.3:                                             ; preds = %bb.2, %bb.1
   %stored.value = phi i32 [ 7, %bb.1 ], [ %mult, %bb.2 ]
-  %arrayidx = getelementptr inbounds i32, i32* %arr, i32 %iv
-  store i32 %stored.value, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %arr, i32 %iv
+  store i32 %stored.value, ptr %arrayidx
   %iv.next = add i32 %iv, 1
   %continue = icmp ult i32 %iv.next, 32
   br i1 %continue, label %bb.1, label %bb.4
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index 524bd21..e81fb66 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 ; -force-vector-interleave, but is a multiple of the internally computed MaxVF;
 ; e.g., when all types are i32 lead to MaxVF=1.
 
-define void @pr45679(i32* %A) optsize {
+define void @pr45679(ptr %A) optsize {
 ; CHECK-LABEL: @pr45679(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -24,32 +24,32 @@ define void @pr45679(i32* %A) optsize {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP2]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP5]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP6]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP8]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP8]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP9]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    store i32 13, i32* [[TMP12]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    store i32 13, ptr [[TMP12]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -63,8 +63,8 @@ define void @pr45679(i32* %A) optsize {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
-; CHECK-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
+; CHECK-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
 ; CHECK-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -86,32 +86,32 @@ define void @pr45679(i32* %A) optsize {
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
 ; VF2UF2-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
-; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP3]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP4]], align 1
+; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP4]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF2UF2:       pred.store.continue:
 ; VF2UF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
 ; VF2UF2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
 ; VF2UF2:       pred.store.if2:
 ; VF2UF2-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
-; VF2UF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP6]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP7]], align 1
+; VF2UF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP6]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP7]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VF2UF2:       pred.store.continue3:
 ; VF2UF2-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; VF2UF2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
 ; VF2UF2:       pred.store.if4:
 ; VF2UF2-NEXT:    [[TMP9:%.*]] = add i32 [[INDEX]], 2
-; VF2UF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP9]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP10]], align 1
+; VF2UF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP9]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP10]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; VF2UF2:       pred.store.continue5:
 ; VF2UF2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
 ; VF2UF2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.if6:
 ; VF2UF2-NEXT:    [[TMP12:%.*]] = add i32 [[INDEX]], 3
-; VF2UF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP12]]
-; VF2UF2-NEXT:    store i32 13, i32* [[TMP13]], align 1
+; VF2UF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP12]]
+; VF2UF2-NEXT:    store i32 13, ptr [[TMP13]], align 1
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.continue7:
 ; VF2UF2-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -125,8 +125,8 @@ define void @pr45679(i32* %A) optsize {
 ; VF2UF2-NEXT:    br label [[LOOP:%.*]]
 ; VF2UF2:       loop:
 ; VF2UF2-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
-; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
-; VF2UF2-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
+; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
+; VF2UF2-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
 ; VF2UF2-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
 ; VF2UF2-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
 ; VF2UF2-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -151,29 +151,29 @@ define void @pr45679(i32* %A) optsize {
 ; VF1UF4-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
 ; VF1UF4-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
-; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP4]], align 1
+; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDUCTION]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP4]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
 ; VF1UF4-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
 ; VF1UF4:       pred.store.if4:
 ; VF1UF4-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
-; VF1UF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP5]], align 1
+; VF1UF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION1]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP5]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; VF1UF4:       pred.store.continue5:
 ; VF1UF4-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; VF1UF4:       pred.store.if6:
 ; VF1UF4-NEXT:    [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
-; VF1UF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP6]], align 1
+; VF1UF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION2]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP6]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; VF1UF4:       pred.store.continue7:
 ; VF1UF4-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.if8:
 ; VF1UF4-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]]
-; VF1UF4-NEXT:    store i32 13, i32* [[TMP7]], align 1
+; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION3]]
+; VF1UF4-NEXT:    store i32 13, ptr [[TMP7]], align 1
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.continue9:
 ; VF1UF4-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
@@ -186,8 +186,8 @@ define void @pr45679(i32* %A) optsize {
 ; VF1UF4-NEXT:    br label [[LOOP:%.*]]
 ; VF1UF4:       loop:
 ; VF1UF4-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
-; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
-; VF1UF4-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
+; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
+; VF1UF4-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
 ; VF1UF4-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
 ; VF1UF4-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
 ; VF1UF4-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -199,8 +199,8 @@ entry:
 
 loop:
   %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv
-  store i32 13, i32* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %riv
+  store i32 13, ptr %arrayidx, align 1
   %rivPlus1 = add nuw nsw i32 %riv, 1
   %cond = icmp eq i32 %rivPlus1, 14
   br i1 %cond, label %exit, label %loop
@@ -209,7 +209,7 @@ exit:
   ret void
 }
 
-define void @load_variant(i64* noalias %a, i64* noalias %b) {
+define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-LABEL: @load_variant(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -223,9 +223,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8
-; CHECK-NEXT:    store i64 [[TMP4]], i64* [[B:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
+; CHECK-NEXT:    store i64 [[TMP4]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK:       pred.store.continue:
 ; CHECK-NEXT:    [[TMP5:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_STORE_IF]] ]
@@ -233,9 +233,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
 ; CHECK:       pred.store.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
-; CHECK-NEXT:    store i64 [[TMP9]], i64* [[B]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT:    store i64 [[TMP9]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP9]], [[PRED_STORE_IF1]] ]
@@ -243,9 +243,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8
-; CHECK-NEXT:    store i64 [[TMP14]], i64* [[B]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+; CHECK-NEXT:    store i64 [[TMP14]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP14]], [[PRED_STORE_IF3]] ]
@@ -253,9 +253,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.if5:
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = load i64, i64* [[TMP18]], align 8
-; CHECK-NEXT:    store i64 [[TMP19]], i64* [[B]], align 8
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 8
+; CHECK-NEXT:    store i64 [[TMP19]], ptr [[B]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[TMP20:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP19]], [[PRED_STORE_IF5]] ]
@@ -270,9 +270,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; CHECK-NEXT:    store i64 [[V]], i64* [[B]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -294,9 +294,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
 ; VF2UF2-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP3]]
-; VF2UF2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP5]], i64* [[B:%.*]], align 8
+; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]]
+; VF2UF2-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP5]], ptr [[B:%.*]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF2UF2:       pred.store.continue:
 ; VF2UF2-NEXT:    [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ]
@@ -304,9 +304,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
 ; VF2UF2:       pred.store.if2:
 ; VF2UF2-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
-; VF2UF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]]
-; VF2UF2-NEXT:    [[TMP10:%.*]] = load i64, i64* [[TMP9]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP10]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
+; VF2UF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP10]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; VF2UF2:       pred.store.continue3:
 ; VF2UF2-NEXT:    [[TMP11:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP10]], [[PRED_STORE_IF2]] ]
@@ -314,9 +314,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
 ; VF2UF2:       pred.store.if4:
 ; VF2UF2-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 2
-; VF2UF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP13]]
-; VF2UF2-NEXT:    [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP15]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
+; VF2UF2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP15]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
 ; VF2UF2:       pred.store.continue5:
 ; VF2UF2-NEXT:    [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE3]] ], [ [[TMP15]], [[PRED_STORE_IF4]] ]
@@ -324,9 +324,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.if6:
 ; VF2UF2-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 3
-; VF2UF2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP18]]
-; VF2UF2-NEXT:    [[TMP20:%.*]] = load i64, i64* [[TMP19]], align 8
-; VF2UF2-NEXT:    store i64 [[TMP20]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP18]]
+; VF2UF2-NEXT:    [[TMP20:%.*]] = load i64, ptr [[TMP19]], align 8
+; VF2UF2-NEXT:    store i64 [[TMP20]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.continue7:
 ; VF2UF2-NEXT:    [[TMP21:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE5]] ], [ [[TMP20]], [[PRED_STORE_IF6]] ]
@@ -341,9 +341,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF2UF2-NEXT:    br label [[FOR_BODY:%.*]]
 ; VF2UF2:       for.body:
 ; VF2UF2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
-; VF2UF2-NEXT:    [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; VF2UF2-NEXT:    store i64 [[V]], i64* [[B]], align 8
+; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; VF2UF2-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; VF2UF2-NEXT:    store i64 [[V]], ptr [[B]], align 8
 ; VF2UF2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; VF2UF2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
 ; VF2UF2-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -368,36 +368,36 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF1UF4-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF1UF4:       pred.store.if:
 ; VF1UF4-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
-; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDUCTION]]
-; VF1UF4-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP5]], i64* [[B:%.*]], align 8
+; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDUCTION]]
+; VF1UF4-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP5]], ptr [[B:%.*]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; VF1UF4:       pred.store.continue:
 ; VF1UF4-NEXT:    [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
 ; VF1UF4:       pred.store.if4:
 ; VF1UF4-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
-; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION1]]
-; VF1UF4-NEXT:    [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP8]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION1]]
+; VF1UF4-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP8]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; VF1UF4:       pred.store.continue5:
 ; VF1UF4-NEXT:    [[TMP9:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP8]], [[PRED_STORE_IF7]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
 ; VF1UF4:       pred.store.if6:
 ; VF1UF4-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
-; VF1UF4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION2]]
-; VF1UF4-NEXT:    [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP11]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION2]]
+; VF1UF4-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP11]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE10]]
 ; VF1UF4:       pred.store.continue7:
 ; VF1UF4-NEXT:    [[TMP12:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE8]] ], [ [[TMP11]], [[PRED_STORE_IF9]] ]
 ; VF1UF4-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.if8:
 ; VF1UF4-NEXT:    [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
-; VF1UF4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[INDUCTION3]]
-; VF1UF4-NEXT:    [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8
-; VF1UF4-NEXT:    store i64 [[TMP14]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION3]]
+; VF1UF4-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
+; VF1UF4-NEXT:    store i64 [[TMP14]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE12]]
 ; VF1UF4:       pred.store.continue9:
 ; VF1UF4-NEXT:    [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE10]] ], [ [[TMP14]], [[PRED_STORE_IF11]] ]
@@ -411,9 +411,9 @@ define void @load_variant(i64* noalias %a, i64* noalias %b) {
 ; VF1UF4-NEXT:    br label [[FOR_BODY:%.*]]
 ; VF1UF4:       for.body:
 ; VF1UF4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
-; VF1UF4-NEXT:    [[V:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
-; VF1UF4-NEXT:    store i64 [[V]], i64* [[B]], align 8
+; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; VF1UF4-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; VF1UF4-NEXT:    store i64 [[V]], ptr [[B]], align 8
 ; VF1UF4-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; VF1UF4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
 ; VF1UF4-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -425,9 +425,9 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
-  %v = load i64, i64* %arrayidx
-  store i64 %v, i64* %b
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
+  %v = load i64, ptr %arrayidx
+  store i64 %v, ptr %b
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 14
   br i1 %exitcond.not, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
index 945de069..ea3de4a 100644
--- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll
@@ -6,7 +6,7 @@
 ; `udiv i64 %y, %add` when expanding SCEV expressions. Make sure we pick %div,
 ; which dominates the vector loop.
 
-define void @test(i16 %x, i64 %y, i32* %ptr) {
+define void @test(i16 %x, i64 %y, ptr %ptr) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV19:%.*]] = sext i16 [[X:%.*]] to i64
@@ -27,7 +27,7 @@ define void @test(i16 %x, i64 %y, i32* %ptr) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    store i32 0, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -38,7 +38,7 @@ define void @test(i16 %x, i64 %y, i32* %ptr) {
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[V2:%.*]] = trunc i64 [[IV]] to i8
 ; CHECK-NEXT:    [[V3:%.*]] = add i8 [[V2]], 1
 ; CHECK-NEXT:    [[CMP15:%.*]] = icmp slt i8 [[V3]], 5
@@ -72,7 +72,7 @@ loop.preheader:
 
 loop:
   %iv = phi i64 [ %iv.next, %loop ], [ 0, %loop.preheader ]
-  store i32 0, i32* %ptr, align 4
+  store i32 0, ptr %ptr, align 4
   %v2 = trunc i64 %iv to i8
   %v3 = add i8 %v2, 1
   %cmp15 = icmp slt i8 %v3, 5
diff --git a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll
index dde445a..461e3a0 100644
--- a/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr47343-expander-lcssa-after-cfg-update.ll
@@ -5,7 +5,7 @@
 ; expanding the memory runtime checks.
 
 @f.e = external global i32, align 1
-@d = external global i8*, align 1
+@d = external global ptr, align 1
 
 declare i1 @cond()
 
@@ -14,7 +14,7 @@ define void @f() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
 ; CHECK:       outer.header:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** @d, align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr @d, align 1
 ; CHECK-NEXT:    [[C_0:%.*]] = call i1 @cond()
 ; CHECK-NEXT:    br i1 [[C_0]], label [[OUTER_EXIT_0:%.*]], label [[INNER_1_HEADER_PREHEADER:%.*]]
 ; CHECK:       inner.1.header.preheader:
@@ -28,26 +28,26 @@ define void @f() {
 ; CHECK:       outer.latch:
 ; CHECK-NEXT:    br label [[OUTER_HEADER]]
 ; CHECK:       outer.exit.0:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i8* [ [[TMP0]], [[OUTER_HEADER]] ]
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi ptr [ [[TMP0]], [[OUTER_HEADER]] ]
 ; CHECK-NEXT:    br label [[LOOP_PREHEADER:%.*]]
 ; CHECK:       outer.exit.1:
-; CHECK-NEXT:    [[DOTLCSSA1:%.*]] = phi i8* [ [[TMP0]], [[INNER_1_LATCH]] ]
+; CHECK-NEXT:    [[DOTLCSSA1:%.*]] = phi ptr [ [[TMP0]], [[INNER_1_LATCH]] ]
 ; CHECK-NEXT:    br label [[LOOP_PREHEADER]]
 ; CHECK:       loop.preheader:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i8* [ [[DOTLCSSA]], [[OUTER_EXIT_0]] ], [ [[DOTLCSSA1]], [[OUTER_EXIT_1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi ptr [ [[DOTLCSSA]], [[OUTER_EXIT_0]] ], [ [[DOTLCSSA1]], [[OUTER_EXIT_1]] ]
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[TMP1]], i64 1
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* bitcast (i32* @f.e to i8*), [[SCEVGEP]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[TMP1]], bitcast (i32* getelementptr inbounds (i32, i32* @f.e, i64 1) to i8*)
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr @f.e, [[SCEVGEP]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[TMP1]], getelementptr inbounds (i32, ptr @f.e, i64 1)
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    store i32 0, i32* @f.e, align 1, !alias.scope !0, !noalias !3
-; CHECK-NEXT:    store i8 10, i8* [[TMP0]], align 1
+; CHECK-NEXT:    store i32 0, ptr @f.e, align 1, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    store i8 10, ptr [[TMP0]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 500
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -60,8 +60,8 @@ define void @f() {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[CONV6_US_US_US:%.*]] = zext i1 false to i32
-; CHECK-NEXT:    store i32 [[CONV6_US_US_US]], i32* @f.e, align 1
-; CHECK-NEXT:    store i8 10, i8* [[TMP1]], align 1
+; CHECK-NEXT:    store i32 [[CONV6_US_US_US]], ptr @f.e, align 1
+; CHECK-NEXT:    store i8 10, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 500
 ; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -72,7 +72,7 @@ entry:
   br label %outer.header
 
 outer.header:                            ; preds = %cleanup, %entry
-  %0 = load i8*, i8** @d, align 1
+  %0 = load ptr, ptr @d, align 1
   %c.0 = call i1 @cond()
   br i1 %c.0, label %outer.exit.0, label %inner.1.header
 
@@ -97,8 +97,8 @@ outer.exit.1:                                         ; preds = %if.end, %if.end
 loop:                                  ; preds = %if.end.us.us.us, %for.body3.lr.ph.outer
   %iv = phi i32 [ %iv.next, %loop ], [ 0, %outer.exit.0 ], [ 0, %outer.exit.1 ]
   %conv6.us.us.us = zext i1 false to i32
-  store i32 %conv6.us.us.us, i32* @f.e, align 1
-  store i8 10, i8* %0, align 1
+  store i32 %conv6.us.us.us, ptr @f.e, align 1
+  store i8 10, ptr %0, align 1
   %iv.next = add nsw i32 %iv, 1
   %ec = icmp eq i32 %iv.next, 500
   br i1 %ec, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/pr48832.ll b/llvm/test/Transforms/LoopVectorize/pr48832.ll
index 64513a5..b89be88 100644
--- a/llvm/test/Transforms/LoopVectorize/pr48832.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr48832.ll
@@ -30,8 +30,8 @@ cond.false:                                       ; preds = %for.body, %land.rhs
 
 cond.end:                                         ; preds = %land.rhs, %cond.false
   %cond = phi i32 [ 0, %cond.false ], [ 1, %land.rhs ]
-  %arrayidx = getelementptr inbounds %arrayt, %arrayt* @v_146, i16 0, i16 %storemerge
-  store i32 %cond, i32* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds %arrayt, ptr @v_146, i16 0, i16 %storemerge
+  store i32 %cond, ptr %arrayidx, align 1
   %inc = add nsw i16 %storemerge, 1
   br label %for.cond
 
diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
index 43451a8..1e6a94eb 100644
--- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
@@ -28,11 +28,11 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK:       pred.load.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i16 [[TMP3]], -1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP4]], i16 0
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP4]], i16 0
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP4]], i16 3
-; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[TMP8]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP4]], i16 3
+; CHECK-NEXT:    [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i16> poison, i16 [[TMP9]], i32 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -43,11 +43,11 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP14:%.*]] = add i16 [[OFFSET_IDX]], -1
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw i16 [[TMP14]], -1
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP15]], i16 0
-; CHECK-NEXT:    [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 1
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP15]], i16 0
+; CHECK-NEXT:    [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 1
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x i16> [[TMP11]], i16 [[TMP17]], i32 1
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[TMP15]], i16 3
-; CHECK-NEXT:    [[TMP20:%.*]] = load i16, i16* [[TMP19]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[TMP15]], i16 3
+; CHECK-NEXT:    [[TMP20:%.*]] = load i16, ptr [[TMP19]], align 1
 ; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <2 x i16> [[TMP12]], i16 [[TMP20]], i32 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue2:
@@ -70,10 +70,10 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IVMINUS1:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i16 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[PREVSUM:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[IVMINUS1]] = add nsw i16 [[IV]], -1
-; CHECK-NEXT:    [[GEPA0:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[IVMINUS1]], i16 0
-; CHECK-NEXT:    [[TMP29:%.*]] = load i16, i16* [[GEPA0]], align 1
-; CHECK-NEXT:    [[GEPA3:%.*]] = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 [[IVMINUS1]], i16 3
-; CHECK-NEXT:    [[TMP30:%.*]] = load i16, i16* [[GEPA3]], align 1
+; CHECK-NEXT:    [[GEPA0:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[IVMINUS1]], i16 0
+; CHECK-NEXT:    [[TMP29:%.*]] = load i16, ptr [[GEPA0]], align 1
+; CHECK-NEXT:    [[GEPA3:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[IVMINUS1]], i16 3
+; CHECK-NEXT:    [[TMP30:%.*]] = load i16, ptr [[GEPA3]], align 1
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[TMP29]], [[TMP30]]
 ; CHECK-NEXT:    [[PREVSUM]] = add nsw i16 [[SUM]], [[ADD]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[IV]], 1
@@ -89,10 +89,10 @@ loop:
   %iv = phi i16 [ 41, %entry ], [ %ivMinus1, %loop ]
   %sum = phi i16 [ 0, %entry ], [ %prevSum, %loop ]
   %ivMinus1 = add nsw i16 %iv, -1
-  %gepA0 = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 %ivMinus1, i16 0
-  %0 = load i16, i16* %gepA0, align 1
-  %gepA3 = getelementptr inbounds [40 x [4 x i16]], [40 x [4 x i16]]* @A, i16 0, i16 %ivMinus1, i16 3
-  %1 = load i16, i16* %gepA3, align 1
+  %gepA0 = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 %ivMinus1, i16 0
+  %0 = load i16, ptr %gepA0, align 1
+  %gepA3 = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 %ivMinus1, i16 3
+  %1 = load i16, ptr %gepA3, align 1
   %add = add nsw i16 %0, %1
   %prevSum = add nsw i16 %sum, %add
   %cmp = icmp ugt i16 %iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
index 9d01797..8cd15da 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll
@@ -9,16 +9,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; DEBUGLOC-LABEL: define void @_Z3fooPf(
 ; Check that the phi to resume the scalar part of the loop
 ; has Debug Location.
-define void @_Z3fooPf(float* %a) {
+define void @_Z3fooPf(ptr %a) {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %p = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %p = load float, ptr %arrayidx, align 4
   %mul = fmul float %p, 2.000000e+00
-  store float %mul, float* %arrayidx, align 4
+  store float %mul, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
index e05e3e2..f263799 100644
--- a/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/ptr-induction.ll
@@ -7,21 +7,21 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK: @f
 ; Expect that the pointer indvar has been converted into an integer indvar.
 ; CHECK: %index.next = add nuw i64 %index, 4
-define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
+define i32 @f(ptr readonly %a, ptr readnone %b) #0 {
 entry:
-  %cmp.6 = icmp ult i32* %a, %b
+  %cmp.6 = icmp ult ptr %a, %b
   br i1 %cmp.6, label %while.body.preheader, label %while.end
 
 while.body.preheader:                             ; preds = %entry
   br label %while.body
 
 while.body:                                       ; preds = %while.body.preheader, %while.body
-  %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
+  %a.pn = phi ptr [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
   %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
-  %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
-  %0 = load i32, i32* %incdec.ptr8, align 1
+  %incdec.ptr8 = getelementptr inbounds i32, ptr %a.pn, i64 1
+  %0 = load i32, ptr %incdec.ptr8, align 1
   %add = add nuw nsw i32 %0, %acc.07
-  %exitcond = icmp eq i32* %incdec.ptr8, %b
+  %exitcond = icmp eq ptr %incdec.ptr8, %b
   br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
 
 while.cond.while.end_crit_edge:                   ; preds = %while.body
diff --git a/llvm/test/Transforms/LoopVectorize/ptr_loops.ll b/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
index 6520ef4..ec0f845 100644
--- a/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -14,14 +14,14 @@ define i32 @_Z5test1v() nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %0, %1
-  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
-  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
-  %2 = load i32, i32* %b.01, align 4
+  %p.02 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 18), %0 ], [ %4, %1 ]
+  %b.01 = phi ptr [ @B, %0 ], [ %5, %1 ]
+  %2 = load i32, ptr %b.01, align 4
   %3 = shl nsw i32 %2, 1
-  store i32 %3, i32* %p.02, align 4
-  %4 = getelementptr inbounds i32, i32* %p.02, i64 -1
-  %5 = getelementptr inbounds i32, i32* %b.01, i64 1
-  %6 = icmp eq i32* %4, getelementptr ([36 x i32], [36 x i32]* @A, i64 128102389400760775, i64 3)
+  store i32 %3, ptr %p.02, align 4
+  %4 = getelementptr inbounds i32, ptr %p.02, i64 -1
+  %5 = getelementptr inbounds i32, ptr %b.01, i64 1
+  %6 = icmp eq ptr %4, getelementptr ([36 x i32], ptr @A, i64 128102389400760775, i64 3)
   br i1 %6, label %7, label %1
 
 ; <label>:7                                       ; preds = %1
@@ -37,13 +37,13 @@ define i32 @_Z5test2v() nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %0, %1
-  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
-  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
-  %2 = load i32, i32* %b.01, align 4
-  store i32 %2, i32* %p.02, align 4
-  %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
-  %4 = getelementptr inbounds i32, i32* %b.01, i64 1
-  %5 = icmp eq i32* %4, getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 18)
+  %p.02 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 25), %0 ], [ %3, %1 ]
+  %b.01 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @B, i64 0, i64 2), %0 ], [ %4, %1 ]
+  %2 = load i32, ptr %b.01, align 4
+  store i32 %2, ptr %p.02, align 4
+  %3 = getelementptr inbounds i32, ptr %p.02, i64 -1
+  %4 = getelementptr inbounds i32, ptr %b.01, i64 1
+  %5 = icmp eq ptr %4, getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 18)
   br i1 %5, label %6, label %1
 
 ; <label>:6                                       ; preds = %1
@@ -59,13 +59,13 @@ define i32 @_Z5test3v() nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %0, %1
-  %p.02 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
-  %b.01 = phi i32* [ getelementptr inbounds ([36 x i32], [36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
-  %2 = load i32, i32* %b.01, align 4
-  store i32 %2, i32* %p.02, align 4
-  %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
-  %4 = getelementptr inbounds i32, i32* %b.01, i64 1
-  %5 = icmp eq i32* %3, getelementptr ([36 x i32], [36 x i32]* @A, i64 128102389400760775, i64 3)
+  %p.02 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @A, i64 0, i64 29), %0 ], [ %3, %1 ]
+  %b.01 = phi ptr [ getelementptr inbounds ([36 x i32], ptr @B, i64 0, i64 5), %0 ], [ %4, %1 ]
+  %2 = load i32, ptr %b.01, align 4
+  store i32 %2, ptr %p.02, align 4
+  %3 = getelementptr inbounds i32, ptr %p.02, i64 -1
+  %4 = getelementptr inbounds i32, ptr %b.01, i64 1
+  %5 = icmp eq ptr %3, getelementptr ([36 x i32], ptr @A, i64 128102389400760775, i64 3)
   br i1 %5, label %6, label %1
 
 ; <label>:6                                       ; preds = %1
diff --git a/llvm/test/Transforms/LoopVectorize/read-only.ll b/llvm/test/Transforms/LoopVectorize/read-only.ll
index 06f0d0a..603ba4d 100644
--- a/llvm/test/Transforms/LoopVectorize/read-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/read-only.ll
@@ -5,18 +5,18 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @read_only_func(
 ;CHECK: load <4 x i32>
 ;CHECK: ret i32
-define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
+define i32 @read_only_func(ptr nocapture %A, ptr nocapture %B, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = add nsw i64 %indvars.iv, 13
-  %5 = getelementptr inbounds i32, i32* %B, i64 %4
-  %6 = load i32, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %B, i64 %4
+  %6 = load i32, ptr %5, align 4
   %7 = shl i32 %6, 1
   %8 = add i32 %3, %sum.02
   %9 = add i32 %8, %7
@@ -35,18 +35,18 @@ define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwin
 ;CHECK-LABEL: @read_only_func_volatile(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret i32
-define i32 @read_only_func_volatile(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
+define i32 @read_only_func_volatile(ptr nocapture %A, ptr nocapture %B, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load volatile i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load volatile i32, ptr %2, align 4
   %4 = add nsw i64 %indvars.iv, 13
-  %5 = getelementptr inbounds i32, i32* %B, i64 %4
-  %6 = load i32, i32* %5, align 4
+  %5 = getelementptr inbounds i32, ptr %B, i64 %4
+  %6 = load i32, ptr %5, align 4
   %7 = shl i32 %6, 1
   %8 = add i32 %3, %sum.02
   %9 = add i32 %8, %7
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index 36acea0..3476750 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s
 
-define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias nocapture readonly %cond, i64 %N){
+define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %cond, i64 %N){
 ; CHECK-LABEL: @cond_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -11,15 +11,14 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -28,8 +27,8 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -38,8 +37,8 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -48,8 +47,8 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -69,13 +68,13 @@ define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias noc
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP29:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP29]], 5.000000e+00
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[FADD:%.*]] = fadd fast float [[RDX]], [[TMP30]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -93,14 +92,14 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv
-  %0 = load float, float* %arrayidx
+  %arrayidx = getelementptr inbounds float, ptr %cond, i64 %iv
+  %0 = load float, ptr %arrayidx
   %tobool = fcmp une float %0, 5.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv
-  %1 = load float, float* %arrayidx2
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %iv
+  %1 = load float, ptr %arrayidx2
   %fadd = fadd fast float %rdx, %1
   br label %for.inc
 
@@ -114,7 +113,7 @@ for.end:
   ret float %res
 }
 
-define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
+define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
 ; CHECK-LABEL: @cond_cmp_sel(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -125,15 +124,14 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -142,8 +140,8 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -152,8 +150,8 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -162,8 +160,8 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -184,13 +182,13 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP30]], 3.000000e+00
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP32:%.*]] = call fast float @llvm.minnum.f32(float [[RDX]], float [[TMP31]])
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -208,14 +206,14 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv
-  %0 = load float, float* %arrayidx
+  %arrayidx = getelementptr inbounds float, ptr %cond, i64 %iv
+  %0 = load float, ptr %arrayidx
   %tobool = fcmp une float %0, 3.000000e+00
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv
-  %1 = load float, float* %arrayidx2
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %iv
+  %1 = load float, ptr %arrayidx2
   %fcmp = fcmp fast olt float %rdx, %1
   %fsel = select fast i1 %fcmp, float %rdx, float %1
   br label %for.inc
@@ -230,7 +228,7 @@ for.end:
   ret float %res
 }
 
-define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 noundef %N) #0 {
+define i32 @conditional_and(ptr noalias %A, ptr noalias %B, i32 %cond, i64 noundef %N) #0 {
 ; CHECK-LABEL: @conditional_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -243,15 +241,14 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -260,8 +257,8 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -270,8 +267,8 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -280,8 +277,8 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -302,13 +299,13 @@ define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 nou
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP30]], [[COND]]
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TMP31]], [[RDX]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -326,14 +323,14 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi i32 [ 7, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %tobool = icmp eq i32 %0, %cond
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %iv
-  %1 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %iv
+  %1 = load i32, ptr %arrayidx2
   %and = and i32 %1, %rdx
   br label %for.inc
 
@@ -347,7 +344,7 @@ for.end:
   ret i32 %res
 }
 
-define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %cond, i64 noundef %N) {
+define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond, i64 noundef %N) {
 ; CHECK-LABEL: @simple_chained_rdx(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -361,15 +358,14 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[COND:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -377,8 +373,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP13]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -386,8 +382,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP18]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -395,8 +391,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP23]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -407,8 +403,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; CHECK:       pred.load.if7:
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
 ; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> poison, i32 [[TMP31]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -416,8 +412,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
 ; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; CHECK:       pred.load.if9:
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; CHECK:       pred.load.continue10:
@@ -425,8 +421,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
 ; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; CHECK:       pred.load.if11:
-; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
 ; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; CHECK:       pred.load.continue12:
@@ -434,8 +430,8 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
 ; CHECK:       pred.load.if13:
-; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4
+; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4
 ; CHECK-NEXT:    [[TMP47:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP46]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; CHECK:       pred.load.continue14:
@@ -456,16 +452,16 @@ define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %c
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[RDX:%.*]] = phi i32 [ [[RES:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP53]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP54:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP54]], [[RDX]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP55:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP55:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP55]]
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
@@ -483,17 +479,17 @@ entry:
 for.body:
   %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
   %rdx = phi i32 [ %res, %for.inc ], [ 5, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:
-  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %add = add nsw i32 %1, %rdx
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add3 = add nsw i32 %add, %2
   br label %for.inc
 
@@ -514,7 +510,7 @@ for.end:
 ;
 ; Reduction not performed in loop as the phi has more than two incoming values
 ;
-define i64 @nested_cond_and(i64* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, i64* noalias nocapture readonly %cond, i64 %N){
+define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %cond, i64 %N){
 ; CHECK-LABEL: @nested_cond_and(
 ; CHECK:       vector.body:
 ; CHECK-NOT:     @llvm.vector.reduce.and
@@ -527,21 +523,21 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
   %rdx = phi i64 [ 5, %entry ], [ %res, %for.inc ]
-  %arrayidx = getelementptr inbounds i64, i64* %cond, i64 %iv
-  %0 = load i64, i64* %arrayidx
+  %arrayidx = getelementptr inbounds i64, ptr %cond, i64 %iv
+  %0 = load i64, ptr %arrayidx
   %tobool = icmp eq i64 %0, 0
   br i1 %tobool, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %iv
-  %1 = load i64, i64* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i64, ptr %a, i64 %iv
+  %1 = load i64, ptr %arrayidx2
   %and1 = and i64 %rdx, %1
   %tobool2 = icmp eq i64 %1, 3
   br i1 %tobool2, label %if.then.2, label %for.inc
 
 if.then.2:
-  %arrayidx3 = getelementptr inbounds i64, i64* %b, i64 %iv
-  %2 = load i64, i64* %arrayidx3
+  %arrayidx3 = getelementptr inbounds i64, ptr %b, i64 %iv
+  %2 = load i64, ptr %arrayidx3
   %and2 = and i64 %rdx, %2
   br label %for.inc
 
@@ -559,7 +555,7 @@ for.end:
 ; if they are the last in the chain, i.e. the loop exit instruction is a Phi node. Therefore we reject
 ; the Phi (%rdx1) as it has more than one use.
 ;
-define i32 @cond-uncond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 noundef %n) #0 {
+define i32 @cond-uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 noundef %n) #0 {
 ; CHECK-LABEL: @cond-uncond(
 ; CHECK:       pred.load.continue6:
 ; CHECK-NOT:     @llvm.vector.reduce.add
@@ -572,21 +568,21 @@ entry:
 for.body:
   %rdx1 = phi i32 [ %add2, %if.end ], [ 0, %entry ]
   %iv = phi i64 [ %iv.next, %if.end ], [ 0, %entry]
-  %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %tobool.not = icmp eq i32 %0, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:
-  %arrayidx1 = getelementptr inbounds i32, i32* %src2, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %src2, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %add = add nsw i32 %1, %rdx1
   br label %if.end
 
 if.end:
   %res = phi i32 [ %add, %if.then ], [ %rdx1, %for.body ]
-  %arrayidx2 = getelementptr inbounds i32, i32* %src1, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %src1, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add2 = add nsw i32 %2, %res
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
@@ -600,7 +596,7 @@ for.end:
 ; Chain of two conditional reductions. We do not vectorise this with in-loop reductions as neither
 ; of the incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx1).
 ;
-define float @cond_cond(float* noalias %src1, float* noalias %src2, float* noalias %cond, i64 %n) #0 {
+define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 %n) #0 {
 ; CHECK-LABEL: @cond_cond(
 ; CHECK:       pred.load.continue14:
 ; CHECK-NOT:     @llvm.vector.reduce.fadd
@@ -613,14 +609,14 @@ entry:
 for.body:
   %rdx1 = phi float [ %res, %for.inc ], [ 2.000000e+00, %entry ]
   %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv
-  %0 = load float, float* %arrayidx
+  %arrayidx = getelementptr inbounds float, ptr %cond, i64 %iv
+  %0 = load float, ptr %arrayidx
   %cmp1 = fcmp fast oeq float %0, 3.000000e+00
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:
-  %arrayidx2 = getelementptr inbounds float, float* %src1, i64 %iv
-  %1 = load float, float* %arrayidx2
+  %arrayidx2 = getelementptr inbounds float, ptr %src1, i64 %iv
+  %1 = load float, ptr %arrayidx2
   %add = fadd fast float %1, %rdx1
   br label %if.end
 
@@ -630,8 +626,8 @@ if.end:
   br i1 %cmp5, label %if.then6, label %for.inc
 
 if.then6:
-  %arrayidx7 = getelementptr inbounds float, float* %src2, i64 %iv
-  %2 = load float, float* %arrayidx7
+  %arrayidx7 = getelementptr inbounds float, ptr %src2, i64 %iv
+  %2 = load float, ptr %arrayidx7
   %add2 = fadd fast float %2, %rdx2
   br label %for.inc
 
@@ -649,7 +645,7 @@ for.end:
 ; Chain of an unconditional & a conditional reduction. We do not vectorise this in-loop as neither of the
 ; incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx).
 ;
-define i32 @uncond_cond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 %N) #0 {
+define i32 @uncond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 %N) #0 {
 ; CHECK-LABEL: @uncond_cond(
 ; CHECK:       pred.load.continue7:
 ; CHECK-NOT:     @llvm.vector.reduce.add
@@ -662,17 +658,17 @@ entry:
 for.body:
   %rdx = phi i32 [ %res, %for.inc ], [ 0, %entry ]
   %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %add1 = add nsw i32 %0, %rdx
-  %arrayidx1 = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %tobool.not = icmp eq i32 %1, 0
   br i1 %tobool.not, label %for.inc, label %if.then
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add2 = add nsw i32 %2, %add1
   br label %for.inc
 
@@ -690,7 +686,7 @@ for.end:
 ; Chain of multiple unconditional & conditional reductions. Does not vectorise in-loop as when we look back
 ; through the chain and check the number of uses of %add1, we find more than the expected one use.
 ;
-define i32 @uncond_cond_uncond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 noundef %N) {
+define i32 @uncond_cond_uncond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond, i64 noundef %N) {
 ; CHECK-LABEL: @uncond_cond_uncond(
 ; CHECK:       pred.load.continue7:
 ; CHECK-NOT:     @llvm.vector.reduce.add
@@ -703,17 +699,17 @@ entry:
 for.body:
   %rdx = phi i32 [ %add3, %if.end ], [ 0, %entry ]
   %iv = phi i64 [ %iv.next, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %iv
+  %0 = load i32, ptr %arrayidx
   %add1 = add nsw i32 %0, %rdx
-  %arrayidx1 = getelementptr inbounds i32, i32* %cond, i64 %iv
-  %1 = load i32, i32* %arrayidx1
+  %arrayidx1 = getelementptr inbounds i32, ptr %cond, i64 %iv
+  %1 = load i32, ptr %arrayidx1
   %tobool.not = icmp eq i32 %1, 0
   br i1 %tobool.not, label %if.end, label %if.then
 
 if.then:
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %iv
-  %2 = load i32, i32* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %iv
+  %2 = load i32, ptr %arrayidx2
   %add2 = add nsw i32 %2, %add1
   br label %if.end
 
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index a827298..e7f33a1 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -17,8 +17,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -27,8 +27,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -37,8 +37,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -47,8 +47,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -76,8 +76,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -89,7 +89,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -104,11 +104,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -118,11 +118,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -132,11 +132,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -146,11 +146,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -186,10 +186,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -204,7 +204,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum_const(i32* noalias nocapture %A) {
+define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -218,8 +218,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -228,8 +228,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -238,8 +238,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -248,8 +248,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -280,8 +280,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %l9 = add i32 %l7, 3
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -294,7 +294,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -309,11 +309,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -323,11 +323,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -337,11 +337,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -351,11 +351,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -391,10 +391,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = mul i32 %prod.02, %l6
   %l8 = mul i32 %l7, %l3
@@ -409,7 +409,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mix(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -424,11 +424,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -438,11 +438,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -452,11 +452,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -466,11 +466,11 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -504,10 +504,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul nsw i32 %l5, %l3
   %l7 = trunc i64 %indvars.iv to i32
   %l8 = add i32 %sum.02, %l7
@@ -522,7 +522,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -536,11 +536,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -550,11 +550,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -564,11 +564,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -578,11 +578,11 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -614,10 +614,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul i32 %sum.02, %l3
   %l7 = mul i32 %l6, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -630,7 +630,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -644,11 +644,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -658,11 +658,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -672,11 +672,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -686,11 +686,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -722,10 +722,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -738,7 +738,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -752,11 +752,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -766,11 +766,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -780,11 +780,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -794,11 +794,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -828,10 +828,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -844,7 +844,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -858,11 +858,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -872,11 +872,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -886,11 +886,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -900,11 +900,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -934,10 +934,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -950,7 +950,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -964,11 +964,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -978,11 +978,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -992,11 +992,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1006,11 +1006,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1040,10 +1040,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1056,7 +1056,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1070,11 +1070,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1084,11 +1084,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1098,11 +1098,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1112,11 +1112,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1148,10 +1148,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1164,7 +1164,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1178,8 +1178,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1188,8 +1188,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1198,8 +1198,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1208,8 +1208,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1237,8 +1237,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1251,7 +1251,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1265,8 +1265,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1275,8 +1275,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1285,8 +1285,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1295,8 +1295,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1324,8 +1324,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1339,7 +1339,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Conditional reductions with multi-input phis.
-define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-LABEL: @reduction_conditional(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1349,12 +1349,10 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
@@ -1397,10 +1395,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %l0, %l1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -1432,7 +1430,7 @@ for.end:
   ret float %sum.1.lcssa
 }
 
-define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_add_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1448,8 +1446,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
 ; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1459,8 +1457,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP12]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1470,8 +1468,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP16:%.*]] = or i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP19]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1481,8 +1479,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP23:%.*]] = or i32 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i8, i8* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP26]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1513,8 +1511,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = add i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -1528,7 +1526,7 @@ entry:
 }
 
 
-define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_and_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1543,8 +1541,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i8, i8* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -1554,8 +1552,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[TMP11]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -1565,8 +1563,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP15:%.*]] = or i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP18]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -1576,8 +1574,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP22:%.*]] = or i32 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP23]]
-; CHECK-NEXT:    [[TMP25:%.*]] = load i8, i8* [[TMP24]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP23]]
+; CHECK-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 4
 ; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP25]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -1608,8 +1606,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = and i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
index f76dfd4..db7a765 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -15,18 +15,14 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 8
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 12
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD4]])
@@ -57,8 +53,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -70,7 +66,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @predicated(i32* noalias nocapture %A) {
+define i32 @predicated(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @predicated(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -93,8 +89,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -103,8 +99,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; CHECK:       pred.load.if7:
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP12]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
 ; CHECK:       pred.load.continue8:
@@ -113,8 +109,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
 ; CHECK:       pred.load.if9:
 ; CHECK-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP18]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
 ; CHECK:       pred.load.continue10:
@@ -123,8 +119,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
 ; CHECK:       pred.load.if11:
 ; CHECK-NEXT:    [[TMP22:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
 ; CHECK:       pred.load.continue12:
@@ -133,8 +129,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14:%.*]]
 ; CHECK:       pred.load.if13:
 ; CHECK-NEXT:    [[TMP28:%.*]] = or i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP28]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP28]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; CHECK:       pred.load.continue14:
@@ -143,8 +139,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP33]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; CHECK:       pred.load.if15:
 ; CHECK-NEXT:    [[TMP34:%.*]] = or i64 [[INDEX]], 5
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP34]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP36]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; CHECK:       pred.load.continue16:
@@ -153,8 +149,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; CHECK:       pred.load.if17:
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i64 [[INDEX]], 6
-; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP40]]
-; CHECK-NEXT:    [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4
+; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP40]]
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
 ; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP42]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; CHECK:       pred.load.continue18:
@@ -163,8 +159,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP45]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; CHECK:       pred.load.if19:
 ; CHECK-NEXT:    [[TMP46:%.*]] = or i64 [[INDEX]], 7
-; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP46]]
-; CHECK-NEXT:    [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4
+; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP46]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4
 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <4 x i32> [[TMP44]], i32 [[TMP48]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; CHECK:       pred.load.continue20:
@@ -173,8 +169,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; CHECK:       pred.load.if21:
 ; CHECK-NEXT:    [[TMP52:%.*]] = or i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]]
-; CHECK-NEXT:    [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
+; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP52]]
+; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4
 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i32> poison, i32 [[TMP54]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; CHECK:       pred.load.continue22:
@@ -183,8 +179,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; CHECK:       pred.load.if23:
 ; CHECK-NEXT:    [[TMP58:%.*]] = or i64 [[INDEX]], 9
-; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP58]]
-; CHECK-NEXT:    [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4
+; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP58]]
+; CHECK-NEXT:    [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4
 ; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i32> [[TMP56]], i32 [[TMP60]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; CHECK:       pred.load.continue24:
@@ -193,8 +189,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; CHECK:       pred.load.if25:
 ; CHECK-NEXT:    [[TMP64:%.*]] = or i64 [[INDEX]], 10
-; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP64]]
-; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[TMP65]], align 4
+; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP64]]
+; CHECK-NEXT:    [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
 ; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <4 x i32> [[TMP62]], i32 [[TMP66]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; CHECK:       pred.load.continue26:
@@ -203,8 +199,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; CHECK:       pred.load.if27:
 ; CHECK-NEXT:    [[TMP70:%.*]] = or i64 [[INDEX]], 11
-; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP70]]
-; CHECK-NEXT:    [[TMP72:%.*]] = load i32, i32* [[TMP71]], align 4
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP70]]
+; CHECK-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
 ; CHECK-NEXT:    [[TMP73:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP72]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; CHECK:       pred.load.continue28:
@@ -213,8 +209,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; CHECK:       pred.load.if29:
 ; CHECK-NEXT:    [[TMP76:%.*]] = or i64 [[INDEX]], 12
-; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP76]]
-; CHECK-NEXT:    [[TMP78:%.*]] = load i32, i32* [[TMP77]], align 4
+; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP76]]
+; CHECK-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4
 ; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <4 x i32> poison, i32 [[TMP78]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; CHECK:       pred.load.continue30:
@@ -223,8 +219,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
 ; CHECK:       pred.load.if31:
 ; CHECK-NEXT:    [[TMP82:%.*]] = or i64 [[INDEX]], 13
-; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP82]]
-; CHECK-NEXT:    [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4
+; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP82]]
+; CHECK-NEXT:    [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4
 ; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP84]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
 ; CHECK:       pred.load.continue32:
@@ -233,8 +229,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
 ; CHECK:       pred.load.if33:
 ; CHECK-NEXT:    [[TMP88:%.*]] = or i64 [[INDEX]], 14
-; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP88]]
-; CHECK-NEXT:    [[TMP90:%.*]] = load i32, i32* [[TMP89]], align 4
+; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP88]]
+; CHECK-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4
 ; CHECK-NEXT:    [[TMP91:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP90]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE34]]
 ; CHECK:       pred.load.continue34:
@@ -243,8 +239,8 @@ define i32 @predicated(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36]]
 ; CHECK:       pred.load.if35:
 ; CHECK-NEXT:    [[TMP94:%.*]] = or i64 [[INDEX]], 15
-; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP94]]
-; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
+; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP94]]
+; CHECK-NEXT:    [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP96]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE36]]
 ; CHECK:       pred.load.continue36:
@@ -284,8 +280,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -297,7 +293,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
+define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
 ; CHECK-LABEL: @cond_rdx_pred(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -341,8 +337,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP12]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -351,8 +347,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; CHECK:       pred.load.if15:
 ; CHECK-NEXT:    [[TMP22:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP22]]
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; CHECK:       pred.load.continue16:
@@ -361,8 +357,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; CHECK:       pred.load.if17:
 ; CHECK-NEXT:    [[TMP28:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP28]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP28]]
+; CHECK-NEXT:    [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP30]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE18]]
 ; CHECK:       pred.load.continue18:
@@ -371,8 +367,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP33]], label [[PRED_LOAD_IF19:%.*]], label [[PRED_LOAD_CONTINUE20:%.*]]
 ; CHECK:       pred.load.if19:
 ; CHECK-NEXT:    [[TMP34:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP34]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP34]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE20]]
 ; CHECK:       pred.load.continue20:
@@ -381,8 +377,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF21:%.*]], label [[PRED_LOAD_CONTINUE22:%.*]]
 ; CHECK:       pred.load.if21:
 ; CHECK-NEXT:    [[TMP40:%.*]] = or i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP40]]
-; CHECK-NEXT:    [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4
+; CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP40]]
+; CHECK-NEXT:    [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
 ; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <4 x i32> poison, i32 [[TMP42]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE22]]
 ; CHECK:       pred.load.continue22:
@@ -391,8 +387,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP45]], label [[PRED_LOAD_IF23:%.*]], label [[PRED_LOAD_CONTINUE24:%.*]]
 ; CHECK:       pred.load.if23:
 ; CHECK-NEXT:    [[TMP46:%.*]] = or i64 [[INDEX]], 5
-; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP46]]
-; CHECK-NEXT:    [[TMP48:%.*]] = load i32, i32* [[TMP47]], align 4
+; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP46]]
+; CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4
 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <4 x i32> [[TMP44]], i32 [[TMP48]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE24]]
 ; CHECK:       pred.load.continue24:
@@ -401,8 +397,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF25:%.*]], label [[PRED_LOAD_CONTINUE26:%.*]]
 ; CHECK:       pred.load.if25:
 ; CHECK-NEXT:    [[TMP52:%.*]] = or i64 [[INDEX]], 6
-; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]]
-; CHECK-NEXT:    [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4
+; CHECK-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP52]]
+; CHECK-NEXT:    [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4
 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <4 x i32> [[TMP50]], i32 [[TMP54]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE26]]
 ; CHECK:       pred.load.continue26:
@@ -411,8 +407,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP57]], label [[PRED_LOAD_IF27:%.*]], label [[PRED_LOAD_CONTINUE28:%.*]]
 ; CHECK:       pred.load.if27:
 ; CHECK-NEXT:    [[TMP58:%.*]] = or i64 [[INDEX]], 7
-; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP58]]
-; CHECK-NEXT:    [[TMP60:%.*]] = load i32, i32* [[TMP59]], align 4
+; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP58]]
+; CHECK-NEXT:    [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4
 ; CHECK-NEXT:    [[TMP61:%.*]] = insertelement <4 x i32> [[TMP56]], i32 [[TMP60]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE28]]
 ; CHECK:       pred.load.continue28:
@@ -421,8 +417,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP63]], label [[PRED_LOAD_IF29:%.*]], label [[PRED_LOAD_CONTINUE30:%.*]]
 ; CHECK:       pred.load.if29:
 ; CHECK-NEXT:    [[TMP64:%.*]] = or i64 [[INDEX]], 8
-; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP64]]
-; CHECK-NEXT:    [[TMP66:%.*]] = load i32, i32* [[TMP65]], align 4
+; CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP64]]
+; CHECK-NEXT:    [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
 ; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE30]]
 ; CHECK:       pred.load.continue30:
@@ -431,8 +427,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP69]], label [[PRED_LOAD_IF31:%.*]], label [[PRED_LOAD_CONTINUE32:%.*]]
 ; CHECK:       pred.load.if31:
 ; CHECK-NEXT:    [[TMP70:%.*]] = or i64 [[INDEX]], 9
-; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP70]]
-; CHECK-NEXT:    [[TMP72:%.*]] = load i32, i32* [[TMP71]], align 4
+; CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP70]]
+; CHECK-NEXT:    [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4
 ; CHECK-NEXT:    [[TMP73:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP72]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE32]]
 ; CHECK:       pred.load.continue32:
@@ -441,8 +437,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP75]], label [[PRED_LOAD_IF33:%.*]], label [[PRED_LOAD_CONTINUE34:%.*]]
 ; CHECK:       pred.load.if33:
 ; CHECK-NEXT:    [[TMP76:%.*]] = or i64 [[INDEX]], 10
-; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP76]]
-; CHECK-NEXT:    [[TMP78:%.*]] = load i32, i32* [[TMP77]], align 4
+; CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP76]]
+; CHECK-NEXT:    [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4
 ; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <4 x i32> [[TMP74]], i32 [[TMP78]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE34]]
 ; CHECK:       pred.load.continue34:
@@ -451,8 +447,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP81]], label [[PRED_LOAD_IF35:%.*]], label [[PRED_LOAD_CONTINUE36:%.*]]
 ; CHECK:       pred.load.if35:
 ; CHECK-NEXT:    [[TMP82:%.*]] = or i64 [[INDEX]], 11
-; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP82]]
-; CHECK-NEXT:    [[TMP84:%.*]] = load i32, i32* [[TMP83]], align 4
+; CHECK-NEXT:    [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP82]]
+; CHECK-NEXT:    [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4
 ; CHECK-NEXT:    [[TMP85:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP84]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE36]]
 ; CHECK:       pred.load.continue36:
@@ -461,8 +457,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP87]], label [[PRED_LOAD_IF37:%.*]], label [[PRED_LOAD_CONTINUE38:%.*]]
 ; CHECK:       pred.load.if37:
 ; CHECK-NEXT:    [[TMP88:%.*]] = or i64 [[INDEX]], 12
-; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP88]]
-; CHECK-NEXT:    [[TMP90:%.*]] = load i32, i32* [[TMP89]], align 4
+; CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP88]]
+; CHECK-NEXT:    [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4
 ; CHECK-NEXT:    [[TMP91:%.*]] = insertelement <4 x i32> poison, i32 [[TMP90]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE38]]
 ; CHECK:       pred.load.continue38:
@@ -471,8 +467,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP93]], label [[PRED_LOAD_IF39:%.*]], label [[PRED_LOAD_CONTINUE40:%.*]]
 ; CHECK:       pred.load.if39:
 ; CHECK-NEXT:    [[TMP94:%.*]] = or i64 [[INDEX]], 13
-; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP94]]
-; CHECK-NEXT:    [[TMP96:%.*]] = load i32, i32* [[TMP95]], align 4
+; CHECK-NEXT:    [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP94]]
+; CHECK-NEXT:    [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP96]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE40]]
 ; CHECK:       pred.load.continue40:
@@ -481,8 +477,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP99]], label [[PRED_LOAD_IF41:%.*]], label [[PRED_LOAD_CONTINUE42:%.*]]
 ; CHECK:       pred.load.if41:
 ; CHECK-NEXT:    [[TMP100:%.*]] = or i64 [[INDEX]], 14
-; CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP100]]
-; CHECK-NEXT:    [[TMP102:%.*]] = load i32, i32* [[TMP101]], align 4
+; CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP100]]
+; CHECK-NEXT:    [[TMP102:%.*]] = load i32, ptr [[TMP101]], align 4
 ; CHECK-NEXT:    [[TMP103:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP102]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE42]]
 ; CHECK:       pred.load.continue42:
@@ -491,8 +487,8 @@ define i32 @cond_rdx_pred(i32 %cond, i32* noalias %a, i64 %N) {
 ; CHECK-NEXT:    br i1 [[TMP105]], label [[PRED_LOAD_IF43:%.*]], label [[PRED_LOAD_CONTINUE44]]
 ; CHECK:       pred.load.if43:
 ; CHECK-NEXT:    [[TMP106:%.*]] = or i64 [[INDEX]], 15
-; CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP106]]
-; CHECK-NEXT:    [[TMP108:%.*]] = load i32, i32* [[TMP107]], align 4
+; CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP106]]
+; CHECK-NEXT:    [[TMP108:%.*]] = load i32, ptr [[TMP107]], align 4
 ; CHECK-NEXT:    [[TMP109:%.*]] = insertelement <4 x i32> [[TMP104]], i32 [[TMP108]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE44]]
 ; CHECK:       pred.load.continue44:
@@ -540,8 +536,8 @@ for.body:
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %load = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %load = load i32, ptr %arrayidx
   %mul = mul nsw i32 %load, %sum
   br label %for.inc
 
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 23e77d6..ad03fe5 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -12,9 +12,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -36,8 +35,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -49,7 +48,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -59,12 +58,10 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
@@ -91,10 +88,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -109,7 +106,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum_const(i32* noalias nocapture %A) {
+define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -118,9 +115,8 @@ define i32 @reduction_sum_const(i32* noalias nocapture %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP4]] = add i32 [[TMP3]], 12
@@ -143,8 +139,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %l9 = add i32 %l7, 3
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -157,7 +153,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -167,12 +163,10 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
@@ -199,10 +193,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = mul i32 %prod.02, %l6
   %l8 = mul i32 %l7, %l3
@@ -217,7 +211,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mix(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -227,12 +221,10 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]]
@@ -258,10 +250,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul nsw i32 %l5, %l3
   %l7 = trunc i64 %indvars.iv to i32
   %l8 = add i32 %sum.02, %l7
@@ -276,7 +268,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -285,12 +277,10 @@ define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]])
@@ -314,10 +304,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = mul i32 %sum.02, %l3
   %l7 = mul i32 %l6, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -330,7 +320,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out) {
+define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out) {
 ; CHECK-LABEL: @start_at_non_zero(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -339,12 +329,10 @@ define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* no
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 120, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[COEFF:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COEFF:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = add i32 [[TMP5]], [[VEC_PHI]]
@@ -367,10 +355,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %coeff, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %mul = mul nsw i32 %l1, %l0
   %add = add nsw i32 %mul, %sum.09
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -383,7 +371,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -392,12 +380,10 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD1]])
@@ -421,10 +407,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -437,7 +423,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -446,12 +432,10 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = or i32 [[TMP5]], [[VEC_PHI]]
@@ -474,10 +458,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -490,7 +474,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -499,12 +483,10 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = xor i32 [[TMP5]], [[VEC_PHI]]
@@ -527,10 +509,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -543,7 +525,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -552,12 +534,10 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP4]], <4 x float> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -579,10 +559,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -595,7 +575,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -604,12 +584,10 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD1]])
@@ -633,10 +611,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -649,7 +627,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -658,9 +636,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -682,8 +659,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -696,7 +673,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -705,9 +682,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[WIDE_LOAD]])
 ; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.umax.i32(i32 [[TMP2]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -729,8 +705,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -744,7 +720,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Sub we can create a reduction, but not inloop
-define i32 @reduction_sub_lhs(i32* noalias nocapture %A) {
+define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sub_lhs(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -753,9 +729,8 @@ define i32 @reduction_sub_lhs(i32* noalias nocapture %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -777,8 +752,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %sub = sub nsw i32 %x.05, %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -791,7 +766,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Conditional reductions with multi-input phis.
-define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-LABEL: @reduction_conditional(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -801,12 +776,10 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
@@ -849,10 +822,10 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %l0, %l1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -884,15 +857,15 @@ for.end:
   ret float %sum.1.lcssa
 }
 
-define i32 @reduction_sum_multiuse(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum_multiuse(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[L2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[L3:%.*]] = load i32, i32* [[L2]], align 4
+; CHECK-NEXT:    [[L2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L3:%.*]] = load i32, ptr [[L2]], align 4
 ; CHECK-NEXT:    [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[L7:%.*]] = add i32 [[SUM_02]], [[L6]]
 ; CHECK-NEXT:    [[L8:%.*]] = add i32 [[L7]], [[L3]]
@@ -910,10 +883,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l10, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -930,7 +903,7 @@ end:
 }
 
 ; Predicated loop, cannot (yet) use in-loop reductions.
-define i32 @reduction_predicated(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_predicated(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -940,12 +913,10 @@ define i32 @reduction_predicated(i32* noalias nocapture %A, i32* noalias nocaptu
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
@@ -972,10 +943,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l6 = trunc i64 %indvars.iv to i32
   %l7 = add i32 %sum.02, %l6
   %l8 = add i32 %l7, %l3
@@ -990,7 +961,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_add_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1000,9 +971,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ <i8 -1, i8 0, i8 0, i8 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3]] = add <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -1025,8 +995,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = add i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -1040,7 +1010,7 @@ entry:
 }
 
 
-define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_and_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -1050,9 +1020,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP3]] = and <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -1075,8 +1044,8 @@ entry:
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = and i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -1090,7 +1059,7 @@ entry:
 }
 
 ; Test case when loop has a call to the llvm.fmuladd intrinsic.
-define float @reduction_fmuladd(float* %a, float* %b, i64 %n) {
+define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @reduction_fmuladd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
@@ -1101,12 +1070,10 @@ define float @reduction_fmuladd(float* %a, float* %b, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6]] = fadd float [[TMP5]], [[VEC_PHI]]
@@ -1123,10 +1090,10 @@ define float @reduction_fmuladd(float* %a, float* %b, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load float, float* [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP8]], float [[TMP9]], float [[SUM_07]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
@@ -1142,10 +1109,10 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
+  %1 = load float, ptr %arrayidx2, align 4
   %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, %n
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
index 08e5781..7d8613b 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> [[ADD_3]], <4 x i32>
 ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define internal i64 @foo(i32* %t0) !prof !1 {
+define internal i64 @foo(ptr %t0) !prof !1 {
 t16:
   br label %t20
 
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
index 83d71fa..536142f 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
-define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sum_single(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -17,8 +17,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -27,8 +27,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -37,8 +37,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -47,8 +47,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -76,8 +76,8 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
   %l7 = add i32 %sum.02, %l3
   %indvars.iv.next = add i32 %indvars.iv, 1
   %exitcond = icmp eq i32 %indvars.iv.next, 257
@@ -88,7 +88,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -102,11 +102,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -116,11 +116,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -130,11 +130,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -144,11 +144,11 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -179,10 +179,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l7 = add i32 %sum.02, %indvars.iv
   %l8 = add i32 %l7, %l3
   %l9 = add i32 %l8, %l5
@@ -195,7 +195,7 @@ entry:
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -209,11 +209,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -223,11 +223,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -237,11 +237,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -251,11 +251,11 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -285,10 +285,10 @@ entry:
 .lr.ph:                                           ; preds = %entry, %.lr.ph
   %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
   %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
-  %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l3 = load i32, i32* %l2, align 4
-  %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l5 = load i32, i32* %l4, align 4
+  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l3 = load i32, ptr %l2, align 4
+  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l5 = load i32, ptr %l4, align 4
   %l8 = mul i32 %prod.02, %l3
   %l9 = mul i32 %l8, %l5
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -300,7 +300,7 @@ entry:
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -314,11 +314,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -328,11 +328,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -342,11 +342,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -356,11 +356,11 @@ define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -390,10 +390,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = and i32 %result.08, %l0
   %and = and i32 %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -405,7 +405,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -419,11 +419,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -433,11 +433,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -447,11 +447,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -461,11 +461,11 @@ define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -495,10 +495,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -510,7 +510,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -524,11 +524,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -538,11 +538,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -552,11 +552,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -566,11 +566,11 @@ define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -600,10 +600,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv
-  %l1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
+  %l1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -615,7 +615,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -629,11 +629,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -643,11 +643,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -657,11 +657,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -671,11 +671,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -705,10 +705,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fadd fast float %result.08, %l0
   %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -720,7 +720,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
+define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -734,11 +734,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -748,11 +748,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -762,11 +762,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
 ; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP21]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP21]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -776,11 +776,11 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load float, float* [[TMP32]], align 4
+; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
-; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[TMP31]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP31]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -810,10 +810,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv
-  %l0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv
-  %l1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
+  %l0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
+  %l1 = load float, ptr %arrayidx2, align 4
   %add = fmul fast float %result.08, %l0
   %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -825,7 +825,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -839,8 +839,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -849,8 +849,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -859,8 +859,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -869,8 +869,8 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -898,8 +898,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp slt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
@@ -911,7 +911,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
+define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -925,8 +925,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
 ; CHECK:       pred.load.continue:
@@ -935,8 +935,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
 ; CHECK:       pred.load.if1:
 ; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
 ; CHECK:       pred.load.continue2:
@@ -945,8 +945,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
 ; CHECK:       pred.load.if3:
 ; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
 ; CHECK:       pred.load.continue4:
@@ -955,8 +955,8 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.if5:
 ; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
@@ -984,8 +984,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %l0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
+  %l0 = load i32, ptr %arrayidx, align 4
   %c0 = icmp ugt i32 %result.08, %l0
   %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i32 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
index 85b42c9..0656cd2 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-ptr.ll
@@ -5,21 +5,21 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 
 ; Reductions of pointer types are not supported.
 
-define void @PR49215(i32* %p, i32* %q) {
+define void @PR49215(ptr %p, ptr %q) {
 ; CHECK-LABEL: @PR49215(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[G:%.*]] = phi i32* [ [[P:%.*]], [[ENTRY]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult i32* [[Q:%.*]], [[G]]
-; CHECK-NEXT:    [[UMIN]] = select i1 [[CMP2]], i32* [[Q]], i32* [[G]]
+; CHECK-NEXT:    [[G:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult ptr [[Q:%.*]], [[G]]
+; CHECK-NEXT:    [[UMIN]] = select i1 [[CMP2]], ptr [[Q]], ptr [[G]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], undef
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT:%.*]], label [[FOR_BODY]]
 ; CHECK:       loopexit:
-; CHECK-NEXT:    [[UMIN_LCSSA:%.*]] = phi i32* [ [[UMIN]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[PHI_CAST:%.*]] = ptrtoint i32* [[UMIN_LCSSA]] to i64
+; CHECK-NEXT:    [[UMIN_LCSSA:%.*]] = phi ptr [ [[UMIN]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PHI_CAST:%.*]] = ptrtoint ptr [[UMIN_LCSSA]] to i64
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -27,14 +27,14 @@ entry:
 
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %g = phi i32* [ %p, %entry ], [ %umin, %for.body ]
-  %cmp2 = icmp ult i32* %q, %g
-  %umin = select i1 %cmp2, i32* %q, i32* %g
+  %g = phi ptr [ %p, %entry ], [ %umin, %for.body ]
+  %cmp2 = icmp ult ptr %q, %g
+  %umin = select i1 %cmp2, ptr %q, ptr %g
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, undef
   br i1 %exitcond, label %loopexit, label %for.body
 
 loopexit:
-  %phi.cast = ptrtoint i32* %umin to i64
+  %phi.cast = ptrtoint ptr %umin to i64
   ret void
 }
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index b8f00f1..278917b 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -8,17 +8,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK: add <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_sum(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %sum.02, %6
   %8 = add i32 %7, %3
@@ -39,17 +39,17 @@ define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: mul <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_prod(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = mul i32 %prod.02, %6
   %8 = mul i32 %7, %3
@@ -70,17 +70,17 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap
 ;CHECK: mul nsw <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_mix(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = mul nsw i32 %5, %3
   %7 = trunc i64 %indvars.iv to i32
   %8 = add i32 %sum.02, %7
@@ -99,17 +99,17 @@ define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: mul <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_mul(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) nounwind uwtable readonly noinline ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %3, %6
   %8 = add i32 %7, %5
@@ -129,7 +129,7 @@ define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapt
 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
+define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -137,10 +137,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %coeff, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %sum.09
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -158,7 +158,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: and <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.and.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_and(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -166,10 +166,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %and = and i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -186,7 +186,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: or <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.or.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_or(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -194,10 +194,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -214,7 +214,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: xor <4 x i32>
 ;CHECK: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_xor(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable readonly {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -222,10 +222,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -243,7 +243,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-NOT: phi <4 x i32>
 ;CHECK-NOT: sub nsw <4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+define i32 @reduction_sub_rhs(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -251,8 +251,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %sub = sub nsw i32 %0, %x.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -270,7 +270,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK: phi <4 x i32>
 ;CHECK: sub <4 x i32>
 ;CHECK: ret i32
-define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+define i32 @reduction_sub_lhs(i32 %n, ptr noalias nocapture %A) nounwind uwtable readonly {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -278,8 +278,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
   %sub = sub nsw i32 %x.05, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -295,17 +295,17 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK: reduction_conditional
 ; CHECK: fadd fast <4 x float>
 
-define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
+define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -340,17 +340,17 @@ for.end:
 ; We can't vectorize reductions with phi inputs from outside the reduction.
 ; CHECK: noreduction_phi
 ; CHECK-NOT: fadd <4 x float>
-define float @noreduction_phi(float* %A, float* %B, float* %C, float %S) {
+define float @noreduction_phi(ptr %A, ptr %B, ptr %C, float %S) {
 entry:
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
-  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %1 = load float, ptr %arrayidx2, align 4
   %cmp3 = fcmp ogt float %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -386,7 +386,7 @@ for.end:
 ; CHECK: noredux_header_phi
 ; CHECK-NOT: fadd <4 x float>
 
-define float @noredux_header_phi(float* %A, float* %B, float* %C, float %S)  {
+define float @noredux_header_phi(ptr %A, ptr %B, ptr %C, float %S)  {
 entry:
   br label %for.body
 
@@ -394,8 +394,8 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
   %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd fast float %sum.08, %0
   %add1 = fadd fast float %sum2.09, %add
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -469,7 +469,7 @@ exit:
 ;CHECK: call i32 @llvm.vector.reduce.add.v4i32(<4 x i32>
 ;CHECK: %sum.copy = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]
 ;CHECK: ret i32
-define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum_multiuse(i32 %n, ptr noalias nocapture %A, ptr noalias nocapture %B) {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph.preheader, label %end
 .lr.ph.preheader:                                 ; preds = %0
@@ -478,10 +478,10 @@ define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noali
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %sum.02, %6
   %8 = add i32 %7, %3
@@ -507,7 +507,7 @@ end:
 ; variable. We cannot vectorize this.
 ; CHECK-LABEL: reduction_reset(
 ; CHECK-NOT: <4 x i32>
-define void @reduction_reset(i32 %N, i32* nocapture readonly %arrayA, i32* nocapture %arrayB) {
+define void @reduction_reset(i32 %N, ptr nocapture readonly %arrayA, ptr nocapture %arrayB) {
 entry:
   %c4 = icmp sgt i32 %N, 0
   br i1 %c4, label %.lr.ph.preheader, label %._crit_edge
@@ -520,8 +520,8 @@ entry:
 .lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
   %indvars.iv = phi i64 [ 0, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ]
   %.017 = phi i32 [ 100, %.lr.ph.preheader ], [ %csel, %.lr.ph ]
-  %c6 = getelementptr inbounds i32, i32* %arrayA, i64 %indvars.iv
-  %c7 = load i32, i32* %c6, align 4
+  %c6 = getelementptr inbounds i32, ptr %arrayA, i64 %indvars.iv
+  %c7 = load i32, ptr %c6, align 4
   %c8 = icmp sgt i32 %c7, 0
   %c9 = add nsw i32 %c7, %.017
   %csel = select i1 %c8, i32 %c9, i32 0
@@ -537,13 +537,13 @@ entry:
 ._crit_edge:                                      ; preds = %._crit_edge.loopexit, %entry
   %.015.lcssa = phi i64 [ -1, %entry ], [ %phitmp19, %._crit_edge.loopexit ]
   %.0.lcssa = phi i32 [ 100, %entry ], [ %csel.lcssa, %._crit_edge.loopexit ]
-  %c10 = getelementptr inbounds i32, i32* %arrayB, i64 %.015.lcssa
-  store i32 %.0.lcssa, i32* %c10, align 4
+  %c10 = getelementptr inbounds i32, ptr %arrayB, i64 %.015.lcssa
+  store i32 %.0.lcssa, ptr %c10, align 4
   ret void
 }
 
 ; Can vectorize reduction with redundant single-operand phi input.
-define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, i64* %A) {
+define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) {
 ; CHECK-LABEL: @reduction_with_phi_with_one_incoming_on_backedge
 ; CHECK: add <4 x i64>
 ;
@@ -553,8 +553,8 @@ entry:
 loop.header:
   %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop.latch ]
   %sum = phi i64 [ 0, %entry ], [ %phi.sum.next, %loop.latch ]
-  %gep.A = getelementptr i64, i64* %A, i16 %iv
-  %lv.A = load i64, i64* %gep.A
+  %gep.A = getelementptr i64, ptr %A, i16 %iv
+  %lv.A = load i64, ptr %gep.A
   %sum.next = add nsw i64 %sum, %lv.A
   br label %loop.bb
 
@@ -573,7 +573,7 @@ exit:
 }
 
 ; Can vectorize reduction with redundant two-operand phi input.
-define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, i64* %A) {
+define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) {
 ; CHECK-LABEL: @reduction_with_phi_with_two_incoming_on_backedge
 ; CHECK: add <4 x i64>
 ;
@@ -583,8 +583,8 @@ entry:
 loop.header:
   %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop.latch ]
   %sum = phi i64 [ 0, %entry ], [ %phi.sum.next, %loop.latch ]
-  %gep.A = getelementptr i64, i64* %A, i16 %iv
-  %lv.A = load i64, i64* %gep.A
+  %gep.A = getelementptr i64, ptr %A, i16 %iv
+  %lv.A = load i64, ptr %gep.A
   %sum.next = add nsw i64 %sum, %lv.A
   %cmp.0 = icmp eq i64 %lv.A, 29
   br i1 %cmp.0, label %loop.bb, label %loop.latch
diff --git a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
index 36d6882..2a5240e73 100644
--- a/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll
@@ -2,7 +2,7 @@
 
 ; Make sure LV does not crash when generating remarks for loops with non-unique
 ; exit blocks.
-define i32 @test_non_unique_exit_blocks(i32* nocapture readonly align 4 dereferenceable(1024) %data, i32 %x) {
+define i32 @test_non_unique_exit_blocks(ptr nocapture readonly align 4 dereferenceable(1024) %data, i32 %x) {
 ; CHECK: loop not vectorized: could not determine number of loop iterations
 ;
 entry:
@@ -15,8 +15,8 @@ for.header:                                         ; preds = %for.cond.lr.ph, %
   br i1 %exitcond.not, label %header.exit, label %for.latch
 
 for.latch:
-  %arrayidx = getelementptr inbounds i32, i32* %data, i64 %iv
-  %lv = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %data, i64 %iv
+  %lv = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %lv, %x
   br i1 %cmp1, label %latch.exit, label %for.header
 
diff --git a/llvm/test/Transforms/LoopVectorize/remove_metadata.ll b/llvm/test/Transforms/LoopVectorize/remove_metadata.ll
index e8c88a8..96de172 100644
--- a/llvm/test/Transforms/LoopVectorize/remove_metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/remove_metadata.ll
@@ -6,16 +6,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK-LABEL: @disable_nonforced_enable(
 ; CHECK: store <2 x i32>
-define void @disable_nonforced_enable(i32* nocapture %a, i32 %n) {
+define void @disable_nonforced_enable(ptr nocapture %a, i32 %n) {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
   %0 = trunc i64 %indvars.iv to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index 8b2e812c..5a95945 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: %[[a0:.+]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK: %[[a4:.+]] = add i64 [[OFFSET_IDX]], -4
 
-define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) {
+define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) {
 entry:
   br label %for.body
 
@@ -19,8 +19,8 @@ for.body:
   %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i64 %add.i7, -1
-  %kind_.i = getelementptr inbounds i32, i32* %ptr, i64 %add.i
-  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %kind_.i = getelementptr inbounds i32, ptr %ptr, i64 %add.i
+  %tmp.i1 = load i32, ptr %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -36,7 +36,7 @@ loopend:
 ; CHECK: %[[a0:.+]] = add i128 [[OFFSET_IDX]], 0
 ; CHECK: %[[a4:.+]] = add i128 [[OFFSET_IDX]], -4
 
-define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
+define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) {
 entry:
   br label %for.body
 
@@ -45,8 +45,8 @@ for.body:
   %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i128 %add.i7, -1
-  %kind_.i = getelementptr inbounds i32, i32* %ptr, i128 %add.i
-  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %kind_.i = getelementptr inbounds i32, ptr %ptr, i128 %add.i
+  %tmp.i1 = load i32, ptr %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -62,7 +62,7 @@ loopend:
 ; CHECK: %[[a0:.+]] = add i16 [[OFFSET_IDX]], 0
 ; CHECK: %[[a4:.+]] = add i16 [[OFFSET_IDX]], -4
 
-define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) {
+define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
 entry:
   br label %for.body
 
@@ -71,8 +71,8 @@ for.body:
   %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i16 %add.i7, -1
-  %kind_.i = getelementptr inbounds i32, i32* %ptr, i16 %add.i
-  %tmp.i1 = load i32, i32* %kind_.i, align 4
+  %kind_.i = getelementptr inbounds i32, ptr %ptr, i16 %add.i
+  %tmp.i1 = load i32, ptr %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -114,8 +114,8 @@ while.body:
   %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ]
   %inc = add i8 %forward_induction.05, 1
   %conv = zext i8 %inc to i32
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, -1
   %0 = trunc i64 %indvars.iv to i32
   %cmp = icmp sgt i32 %0, 0
@@ -140,8 +140,8 @@ while.body:
   %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ]
   %inc = add i8 %forward_induction.05, 1
   %conv = sext i8 %inc to i32
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, -1
   %0 = trunc i64 %indvars.iv to i32
   %cmp = icmp sgt i32 %0, 0
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_iter.ll b/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
index 45b7d09..43e916b 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
@@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @foo(
 ;CHECK:  <i32 0, i32 -1, i32 -2, i32 -3>
 ;CHECK: ret
-define i32 @foo(i32 %n, i32* nocapture %A) {
+define i32 @foo(i32 %n, ptr nocapture %A) {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
@@ -30,8 +30,8 @@ define i32 @foo(i32 %n, i32* nocapture %A) {
   %4 = trunc i64 %indvars.iv to i32
   %5 = shl nsw i32 %4, 1
   %6 = sext i32 %5 to i64
-  %7 = getelementptr inbounds i32, i32* %A, i64 %6
-  %8 = load i32, i32* %7, align 4
+  %7 = getelementptr inbounds i32, ptr %A, i64 %6
+  %8 = load i32, ptr %7, align 4
   %9 = add nsw i32 %8, %sum.01
   %indvars.iv.next = add i64 %indvars.iv, -1
   %10 = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
index 77fddd4..1a8bbfd 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -25,7 +25,7 @@ target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
 
 ; Both parameters are unidentified objects with the same address
 ; space, so this should vectorize normally.
-define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+define void @foo(ptr addrspace(1) %a, ptr addrspace(1) %b, i32 %n) #0 {
 ; CHECK-LABEL: @foo(
 ; CHECK: <4 x i32>
 ; CHECK: ret
@@ -37,12 +37,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -52,7 +52,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Parameters are unidentified and different address spaces, so cannot vectorize.
-define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 {
+define void @bar0(ptr %a, ptr addrspace(1) %b, i32 %n) #0 {
 ; CHECK-LABEL: @bar0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -64,12 +64,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -79,7 +79,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Swapped arguments should be the same
-define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 {
+define void @bar1(ptr addrspace(1) %a, ptr %b, i32 %n) #0 {
 ; CHECK-LABEL: @bar1(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -91,12 +91,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -107,7 +107,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; We should still be able to vectorize with noalias even if the
 ; address spaces are different.
-define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 {
+define void @bar2(ptr noalias %a, ptr addrspace(1) noalias %b, i32 %n) #0 {
 ; CHECK-LABEL: @bar2(
 ; CHECK: <4 x i32>
 ; CHECK: ret
@@ -119,12 +119,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -135,7 +135,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; Store to identified global with different address space. This isn't
 ; generally safe and shouldn't be vectorized.
-define void @arst0(i32* %b, i32 %n) #0 {
+define void @arst0(ptr %b, i32 %n) #0 {
 ; CHECK-LABEL: @arst0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -147,12 +147,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr addrspace(1) @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -164,7 +164,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; Load from identified global with different address space.
 ; This isn't generally safe and shouldn't be vectorized.
-define void @arst1(i32* %b, i32 %n) #0 {
+define void @arst1(ptr %b, i32 %n) #0 {
 ; CHECK-LABEL: @arst1(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -176,12 +176,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr addrspace(1) @g_as1, i64 0, i64 %idxprom
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1
-  store i32 %mul, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %idxprom1
+  store i32 %mul, ptr %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
@@ -204,12 +204,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
-  %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
-  %0 = load i32, i32 addrspace(2)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1024 x i32], ptr addrspace(2) @q_as2, i64 0, i64 %idxprom
+  %0 = load i32, ptr addrspace(2) %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
-  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
-  store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds [1024 x i32], ptr addrspace(1) @g_as1, i64 0, i64 %idxprom1
+  store i32 %mul, ptr addrspace(1) %arrayidx2, align 4
   %inc = add nsw i32 %i.02, 1
   %cmp = icmp slt i32 %inc, %n
   br i1 %cmp, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll
index 22bdc68..4407cf1 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; Test case for PR47751. Make sure the runtime check includes a required
 ; addition of the size of the element type (a pointer) for the end bound.
 
-define void @test(i64 %arg, i32 %arg1, i8** %base) {
+define void @test(i64 %arg, i32 %arg1, ptr %base) {
 ; CHECK:      LAA: Adding RT check for range:
 ; CHECK-NEXT:  Start: ((8 * (zext i32 (-1 + %arg1)<nsw> to i64))<nuw><nsw> + (8 * (1 smin %arg)) + (-8 * %arg) + %base)
 ; CHECK-SAME:  End: (8 + (8 * (zext i32 (-1 + %arg1)<nsw> to i64))<nuw><nsw> + %base)
@@ -26,12 +26,12 @@ loop:
   %iv.2 = phi i32 [ %arg1, %entry ], [ %iv.2.next, %loop ]
   %iv.2.next = add nsw i32 %iv.2, -1
   %iv.2.ext = zext i32 %iv.2.next to i64
-  %idx.1 = getelementptr inbounds i8*, i8** %base, i64 %iv.2.ext
-  %v.1 = load i8*, i8** %idx.1, align 8
-  %idx.2 = getelementptr inbounds i8*, i8** %base, i64 %iv.1
-  %v.2 = load i8*, i8** %idx.2, align 8
-  store i8* %v.2, i8** %idx.1, align 8
-  store i8* %v.1, i8** %idx.2, align 8
+  %idx.1 = getelementptr inbounds ptr, ptr %base, i64 %iv.2.ext
+  %v.1 = load ptr, ptr %idx.1, align 8
+  %idx.2 = getelementptr inbounds ptr, ptr %base, i64 %iv.1
+  %v.2 = load ptr, ptr %idx.2, align 8
+  store ptr %v.2, ptr %idx.1, align 8
+  store ptr %v.1, ptr %idx.2, align 8
   %tmp11 = icmp sgt i64 %iv.1, 1
   %iv.1.next = add nsw i64 %iv.1, -1
   br i1 %tmp11, label %loop, label %exit
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
index 51f9d03..988e148 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
 
 
-define void @add_ints_1_1_1(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+define void @add_ints_1_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c) #0 {
 ; CHECK-LABEL: @add_ints_1_1_1(
 ; CHECK: <4 x i32>
 ; CHECK: ret
@@ -14,13 +14,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
-  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %c, i64 %i.01
+  %1 = load i32, ptr addrspace(1) %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
-  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %i.01
+  store i32 %add, ptr addrspace(1) %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -29,7 +29,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_1_0_0(i32 addrspace(1)* %a, i32* %b, i32* %c) #0 {
+define void @add_ints_as_1_0_0(ptr addrspace(1) %a, ptr %b, ptr %c) #0 {
 ; CHECK-LABEL: @add_ints_as_1_0_0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -39,13 +39,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.01
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.01
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %c, i64 %i.01
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
-  store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %i.01
+  store i32 %add, ptr addrspace(1) %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -54,7 +54,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_0_1_0(i32* %a, i32 addrspace(1)* %b, i32* %c) #0 {
+define void @add_ints_as_0_1_0(ptr %a, ptr addrspace(1) %b, ptr %c) #0 {
 ; CHECK-LABEL: @add_ints_as_0_1_0(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -64,13 +64,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
-  %1 = load i32, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %c, i64 %i.01
+  %1 = load i32, ptr %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.01
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -79,7 +79,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_0_1_1(i32* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #0 {
+define void @add_ints_as_0_1_1(ptr %a, ptr addrspace(1) %b, ptr addrspace(1) %c) #0 {
 ; CHECK-LABEL: @add_ints_as_0_1_1(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -89,13 +89,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
-  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %c, i64 %i.01
+  %1 = load i32, ptr addrspace(1) %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.01
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
@@ -104,7 +104,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @add_ints_as_0_1_2(i32* %a, i32 addrspace(1)* %b, i32 addrspace(2)* %c) #0 {
+define void @add_ints_as_0_1_2(ptr %a, ptr addrspace(1) %b, ptr addrspace(2) %c) #0 {
 ; CHECK-LABEL: @add_ints_as_0_1_2(
 ; CHECK-NOT: <4 x i32>
 ; CHECK: ret
@@ -114,13 +114,13 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32 addrspace(2)* %c, i64 %i.01
-  %1 = load i32, i32 addrspace(2)* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %i.01
+  %0 = load i32, ptr addrspace(1) %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr addrspace(2) %c, i64 %i.01
+  %1 = load i32, ptr addrspace(2) %arrayidx1, align 4
   %add = add nsw i32 %0, %1
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
-  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %i.01
+  store i32 %add, ptr %arrayidx2, align 4
   %inc = add i64 %i.01, 1
   %cmp = icmp ult i64 %inc, 200
   br i1 %cmp, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index a210059b..26bc36d 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -2,14 +2,14 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) {
+define void @add_ints(ptr nocapture %A, ptr nocapture %B, ptr nocapture %C) {
 ; CHECK-LABEL: @add_ints(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK-LABEL: vector.memcheck:
-; CHECK-NEXT:    [[A1:%.*]] = ptrtoint i32* [[A:%.*]] to i64
-; CHECK-NEXT:    [[B2:%.*]] = ptrtoint i32* [[B:%.*]] to i64
-; CHECK-NEXT:    [[C3:%.*]] = ptrtoint i32* [[C:%.*]] to i64
+; CHECK-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[C3:%.*]] = ptrtoint ptr [[C:%.*]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[A1]], [[B2]]
 ; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[A1]], [[C3]]
@@ -25,13 +25,13 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  store i32 %add, ptr %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 200
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll b/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
index c97433f..c76c2c0 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
@@ -6,23 +6,20 @@
 
 ; CHECK-NOT: vector.body
 
-define void @barney(%struct.foo* %ptr) {
+define void @barney(ptr %ptr) {
 entry:
   br label %loop
 
 loop:
   %tmp3 = phi i64 [ 0, %entry ], [ %tmp18, %loop ]
-  %tmp4 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 undef
-  %tmp5 = bitcast %struct.foo* %tmp4 to i64*
-  store i64 0, i64* %tmp5, align 8
+  %tmp4 = getelementptr inbounds %struct.foo, ptr %ptr, i64 undef
+  store i64 0, ptr %tmp4, align 8
   %tmp8 = add i64 1, %tmp3
-  %tmp10 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 %tmp8
-  %tmp11 = bitcast %struct.foo* %tmp10 to i64*
-  store i64 1, i64* %tmp11, align 8
+  %tmp10 = getelementptr inbounds %struct.foo, ptr %ptr, i64 %tmp8
+  store i64 1, ptr %tmp10, align 8
   %tmp14 = add i64 undef, %tmp3
-  %tmp16 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 %tmp14
-  %tmp17 = bitcast %struct.foo* %tmp16 to i64*
-  store i64 2, i64* %tmp17, align 8
+  %tmp16 = getelementptr inbounds %struct.foo, ptr %ptr, i64 %tmp14
+  store i64 2, ptr %tmp16, align 8
   %tmp18 = add nuw nsw i64 %tmp3, 4
   %c = icmp ult i64 %tmp18, 400
   br i1 %c, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/safegep.ll b/llvm/test/Transforms/LoopVectorize/safegep.ll
index 8d3cf4b..47cf1b1 100644
--- a/llvm/test/Transforms/LoopVectorize/safegep.ll
+++ b/llvm/test/Transforms/LoopVectorize/safegep.ll
@@ -11,19 +11,19 @@ target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:
 ; CHECK-LABEL: @safe(
 ; CHECK: <4 x float>
 
-define void @safe(float* %A, float* %B, float %K) {
+define void @safe(ptr %A, ptr %B, float %K) {
 entry:
   br label %"<bb 3>"
 
 "<bb 3>":
   %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
-  %pp3 = getelementptr float, float* %A, i32 %i_15
-  %D.1396_10 = load float, float* %pp3, align 4
-  %pp24 = getelementptr float, float* %B, i32 %i_15
-  %D.1398_15 = load float, float* %pp24, align 4
+  %pp3 = getelementptr float, ptr %A, i32 %i_15
+  %D.1396_10 = load float, ptr %pp3, align 4
+  %pp24 = getelementptr float, ptr %B, i32 %i_15
+  %D.1398_15 = load float, ptr %pp24, align 4
   %D.1399_17 = fadd float %D.1398_15, %K
   %D.1400_18 = fmul float %D.1396_10, %D.1399_17
-  store float %D.1400_18, float* %pp3, align 4
+  store float %D.1400_18, ptr %pp3, align 4
   %i_19 = add nsw i32 %i_15, 1
   %exitcond = icmp ne i32 %i_19, 64
   br i1 %exitcond, label %"<bb 3>", label %return
@@ -37,19 +37,19 @@ return:
 ; CHECK-LABEL: @notsafe(
 ; CHECK-NOT: <4 x float>
 
-define void @notsafe(float addrspace(5) * %A, float* %B, float %K) {
+define void @notsafe(ptr addrspace(5) %A, ptr %B, float %K) {
 entry:
   br label %"<bb 3>"
 
 "<bb 3>":
   %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
-  %pp3 = getelementptr float, float addrspace(5) * %A, i32 %i_15
-  %D.1396_10 = load float, float addrspace(5) * %pp3, align 4
-  %pp24 = getelementptr float, float* %B, i32 %i_15
-  %D.1398_15 = load float, float* %pp24, align 4
+  %pp3 = getelementptr float, ptr addrspace(5) %A, i32 %i_15
+  %D.1396_10 = load float, ptr addrspace(5) %pp3, align 4
+  %pp24 = getelementptr float, ptr %B, i32 %i_15
+  %D.1398_15 = load float, ptr %pp24, align 4
   %D.1399_17 = fadd float %D.1398_15, %K
   %D.1400_18 = fmul float %D.1396_10, %D.1399_17
-  store float %D.1400_18, float addrspace(5) * %pp3, align 4
+  store float %D.1400_18, ptr addrspace(5) %pp3, align 4
   %i_19 = add nsw i32 %i_15, 1
   %exitcond = icmp ne i32 %i_19, 64
   br i1 %exitcond, label %"<bb 3>", label %return
diff --git a/llvm/test/Transforms/LoopVectorize/same-base-access.ll b/llvm/test/Transforms/LoopVectorize/same-base-access.ll
index 49c27f9..802ed14 100644
--- a/llvm/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/same-base-access.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; This is kernel11 from "LivermoreLoops". We can't vectorize it because we
 ; access both x[k] and x[k-1].
 ;
-; void kernel11(double *x, double *y, int n) {
+; void kernel11(ptr x, ptr y, int n) {
 ;   for ( int k=1 ; k<n ; k++ )
 ;     x[k] = x[k-1] + y[k];
 ; }
@@ -13,47 +13,47 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @kernel11(
 ; CHECK-NOT: <4 x double>
 ; CHECK: ret
-define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
-  %1 = alloca double*, align 8
-  %2 = alloca double*, align 8
+define i32 @kernel11(ptr %x, ptr %y, i32 %n) nounwind uwtable ssp {
+  %1 = alloca ptr, align 8
+  %2 = alloca ptr, align 8
   %3 = alloca i32, align 4
   %k = alloca i32, align 4
-  store double* %x, double** %1, align 8
-  store double* %y, double** %2, align 8
-  store i32 %n, i32* %3, align 4
-  store i32 1, i32* %k, align 4
+  store ptr %x, ptr %1, align 8
+  store ptr %y, ptr %2, align 8
+  store i32 %n, ptr %3, align 4
+  store i32 1, ptr %k, align 4
   br label %4
 
 ; <label>:4                                       ; preds = %25, %0
-  %5 = load i32, i32* %k, align 4
-  %6 = load i32, i32* %3, align 4
+  %5 = load i32, ptr %k, align 4
+  %6 = load i32, ptr %3, align 4
   %7 = icmp slt i32 %5, %6
   br i1 %7, label %8, label %28
 
 ; <label>:8                                       ; preds = %4
-  %9 = load i32, i32* %k, align 4
+  %9 = load i32, ptr %k, align 4
   %10 = sub nsw i32 %9, 1
   %11 = sext i32 %10 to i64
-  %12 = load double*, double** %1, align 8
-  %13 = getelementptr inbounds double, double* %12, i64 %11
-  %14 = load double, double* %13, align 8
-  %15 = load i32, i32* %k, align 4
+  %12 = load ptr, ptr %1, align 8
+  %13 = getelementptr inbounds double, ptr %12, i64 %11
+  %14 = load double, ptr %13, align 8
+  %15 = load i32, ptr %k, align 4
   %16 = sext i32 %15 to i64
-  %17 = load double*, double** %2, align 8
-  %18 = getelementptr inbounds double, double* %17, i64 %16
-  %19 = load double, double* %18, align 8
+  %17 = load ptr, ptr %2, align 8
+  %18 = getelementptr inbounds double, ptr %17, i64 %16
+  %19 = load double, ptr %18, align 8
   %20 = fadd double %14, %19
-  %21 = load i32, i32* %k, align 4
+  %21 = load i32, ptr %k, align 4
   %22 = sext i32 %21 to i64
-  %23 = load double*, double** %1, align 8
-  %24 = getelementptr inbounds double, double* %23, i64 %22
-  store double %20, double* %24, align 8
+  %23 = load ptr, ptr %1, align 8
+  %24 = getelementptr inbounds double, ptr %23, i64 %22
+  store double %20, ptr %24, align 8
   br label %25
 
 ; <label>:25                                      ; preds = %8
-  %26 = load i32, i32* %k, align 4
+  %26 = load i32, ptr %k, align 4
   %27 = add nsw i32 %26, 1
-  store i32 %27, i32* %k, align 4
+  store i32 %27, ptr %k, align 4
   br label %4
 
 ; <label>:28                                      ; preds = %4
@@ -77,14 +77,14 @@ define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: @func2(
 ; CHECK: <4 x i32>
 ; CHECK: ret
-define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
+define i32 @func2(ptr nocapture %a) nounwind uwtable ssp {
   br label %1
 
 ; <label>:1                                       ; preds = %7, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
   %2 = mul nsw i64 %indvars.iv, 7
-  %3 = getelementptr inbounds i32, i32* %a, i64 %2
-  %4 = load i32, i32* %3, align 4
+  %3 = getelementptr inbounds i32, ptr %a, i64 %2
+  %4 = load i32, ptr %3, align 4
   %5 = icmp sgt i32 %4, 3
   br i1 %5, label %6, label %7
 
@@ -96,7 +96,7 @@ define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
 ; <label>:7                                       ; preds = %6, %1
   %x.0 = phi i32 [ %tmp1, %6 ], [ %4, %1 ]
   %8 = add nsw i32 %x.0, 3
-  store i32 %8, i32* %3, align 4
+  store i32 %8, ptr %3, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
index 416e507..f4c099f 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 ; CHECK-LABEL: @test1(
 ; CHECK:       vector.body:
 ; CHECK:         [[FCMP1:%.*]] = fcmp ogt <vscale x 2 x float>
@@ -14,13 +14,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.assume(i1 %cmp1)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
@@ -33,9 +33,9 @@ declare void @llvm.assume(i1) #0
 
 attributes #0 = { nounwind willreturn }
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(float *%a, float *%b) {
+define void @test2(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test2(
 ; CHECK:       entry:
 ; CHECK:         [[MASKCOND:%.*]] = icmp eq i64 %ptrint1, 0
@@ -46,9 +46,9 @@ define void @test2(float *%a, float *%b) {
 ; CHECK:         tail call void @llvm.assume(i1 [[MASKCOND4]])
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND4]])
 entry:
-  %ptrint1 = ptrtoint float* %a to i64
+  %ptrint1 = ptrtoint ptr %a to i64
   %maskcond = icmp eq i64 %ptrint1, 0
-  %ptrint2 = ptrtoint float* %b to i64
+  %ptrint2 = ptrtoint ptr %b to i64
   %maskcond4 = icmp eq i64 %ptrint2, 0
   br label %for.body
 
@@ -56,12 +56,12 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.assume(i1 %maskcond)
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
   tail call void @llvm.assume(i1 %maskcond4)
-  %arrayidx5 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
@@ -73,7 +73,7 @@ for.end:                                          ; preds = %for.body
 ; Test case for PR43620. Make sure we can vectorize with predication in presence
 ; of assume calls. For now, check that we drop all assumes in predicated blocks
 ; in the vector body.
-define void @predicated_assume(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %n) {
+define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %n) {
 ; Check that the vector.body does not contain any assumes.
 ; CHECK-LABEL: @predicated_assume(
 ; CHECK:       vector.body:
@@ -94,11 +94,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %0
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.cond.cleanup, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
index 113e008..630a17e 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll
@@ -6,7 +6,7 @@
 ;     b[i] =  a[i] + a[i - 1]
 ; }
 ;
-define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) {
+define void @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
 ; CHECK-VF4UF1-LABEL: @recurrence_1
 ; CHECK-VF4UF1: for.preheader
 ; CHECK-VF4UF1: %[[SUB_1:.*]] = add i32 %n, -1
@@ -20,7 +20,7 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n)
 ; CHECK-VF4UF1: vector.body:
 ; CHECK-VF4UF1: %[[INDEX:.*]] = phi i64 [ 0, %vector.ph ], [ %[[NEXT_IDX:.*]], %vector.body ]
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, ptr
 ; CHECK-VF4UF1: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VEC_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
 ; CHECK-VF4UF1: middle.block:
 ; CHECK-VF4UF1: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32()
@@ -35,19 +35,18 @@ entry:
   br label %for.preheader
 
 for.preheader:
-  %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
-  %pre_load = load i32, i32* %arrayidx.phi.trans.insert
+  %pre_load = load i32, ptr %a
   br label %scalar.body
 
 scalar.body:
   %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
   %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
-  %1 = load i32, i32* %arrayidx32
-  %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %arrayidx32 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv.next
+  %1 = load i32, ptr %arrayidx32
+  %arrayidx34 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
   %add35 = add i32 %1, %0
-  store i32 %add35, i32* %arrayidx34
+  store i32 %add35, ptr %arrayidx34
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
   br i1 %exitcond, label %for.exit, label %scalar.body, !llvm.loop !0
@@ -63,7 +62,7 @@ for.exit:
 ;   return minmax;
 ; }
 ;
-define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
+define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
 ; CHECK-VF4UF1-LABEL: @recurrence_2
 ; CHECK-VF4UF1: vector.ph:
 ; CHECK-VF4UF1: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32()
@@ -72,7 +71,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
 ; CHECK-VF4UF1: %[[VEC_RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %.pre, i32 %[[SUB1]]
 ; CHECK-VF4UF1: vector.body:
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[VEC_RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
+; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i32>, ptr
 ; CHECK-VF4UF1: %[[REVERSE:.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VEC_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
 ; CHECK-VF4UF1: middle.block:
 ; CHECK-VF4UF1: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32()
@@ -84,8 +83,8 @@ entry:
   br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
 
 for.preheader:
-  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
-  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
+  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, ptr %a, i64 -1
+  %.pre = load i32, ptr %arrayidx2.phi.trans.insert, align 4
   br label %scalar.body
 
 for.cond.cleanup.loopexit:
@@ -100,8 +99,8 @@ scalar.body:
   %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
   %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
   %minmax.028 = phi i32 [ undef, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
   %sub3 = sub nsw i32 %1, %0
   %cmp4 = icmp sgt i32 %sub3, 0
   %cond = select i1 %cmp4, i32 %sub3, i32 0
@@ -113,7 +112,7 @@ scalar.body:
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body, !llvm.loop !0
 }
 
-define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) {
+define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, float %f, i16 %p) {
 ; CHECK-VF4UF1: vector.ph:
 ; CHECK-VF4UF1: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-VF4UF1: %[[MUL1:.*]] = mul i32 %[[VSCALE1]], 4
@@ -121,7 +120,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32
 ; CHECK-VF4UF1: %vector.recur.init = insertelement <vscale x 4 x i16> poison, i16 %0, i32 %[[SUB1]]
 ; CHECK-VF4UF1: vector.body:
 ; CHECK-VF4UF1: %vector.recur = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[L1:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[L1]] = load <vscale x 4 x i16>, <vscale x 4 x i16>*
+; CHECK-VF4UF1: %[[L1]] = load <vscale x 4 x i16>, ptr
 ; CHECK-VF4UF1: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %vector.recur, <vscale x 4 x i16> %[[L1]], i32 -1)
 ; Check also that the casts were not moved needlessly.
 ; CHECK-VF4UF1: sitofp <vscale x 4 x i16> %[[L1]] to <vscale x 4 x double>
@@ -132,13 +131,13 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32
 ; CHECK-VF4UF1: %[[SUB2:.*]] = sub i32 %[[MUL2]], 1
 ; CHECK-VF4UF1: %vector.recur.extract = extractelement <vscale x 4 x i16> %[[L1]], i32 %[[SUB2]]
 entry:
-  %0 = load i16, i16* %a, align 2
+  %0 = load i16, ptr %a, align 2
   %conv = sitofp i16 %0 to double
   %conv1 = fpext float %f to double
   %conv2 = sitofp i16 %p to double
   %mul = fmul fast double %conv2, %conv1
   %sub = fsub fast double %conv, %mul
-  store double %sub, double* %b, align 8
+  store double %sub, ptr %b, align 8
   %cmp25 = icmp sgt i32 %n, 1
   br i1 %cmp25, label %for.preheader, label %for.end
 
@@ -148,14 +147,14 @@ for.preheader:
 scalar.body:
   %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
   %iv = phi i64 [ %iv.next, %scalar.body ], [ 1, %for.preheader ]
-  %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %iv
-  %2 = load i16, i16* %arrayidx5, align 2
+  %arrayidx5 = getelementptr inbounds i16, ptr %a, i64 %iv
+  %2 = load i16, ptr %arrayidx5, align 2
   %conv6 = sitofp i16 %2 to double
   %conv11 = sitofp i16 %1 to double
   %mul12 = fmul fast double %conv11, %conv1
   %sub13 = fsub fast double %conv6, %mul12
-  %arrayidx15 = getelementptr inbounds double, double* %b, i64 %iv
-  store double %sub13, double* %arrayidx15, align 8
+  %arrayidx15 = getelementptr inbounds double, ptr %b, i64 %iv
+  store double %sub13, ptr %arrayidx15, align 8
   %iv.next = add nuw nsw i64 %iv, 1
   %lftr.wideiv = trunc i64 %iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -195,7 +194,7 @@ for.end:
 ; another at the second last index. We need these 2 extracts because
 ; the first order recurrence phi is used outside the loop, so we require the phi
 ; itself and not its update (addx).
-define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
+define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; CHECK-VF4UF2-LABEL: @extract_second_last_iteration
 ; CHECK-VF4UF2: vector.ph
 ; CHECK-VF4UF2: call i32 @llvm.vscale.i32()
@@ -239,20 +238,20 @@ for.end:
 
 ; void sink_after(short *a, int n, int *b) {
 ;   for(int i = 0; i < n; i++)
-;     b[i] = (a[i] * a[i + 1]);
+;     b[i] = (aptr a[i + 1]);
 ; }
 
 ; Check that the sext sank after the load in the vector loop.
-define void @sink_after(i16* %a, i32* %b, i64 %n) {
+define void @sink_after(ptr %a, ptr %b, i64 %n) {
 ; CHECK-VF4UF1-LABEL: @sink_after
 ; CHECK-VF4UF1: vector.body
 ; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
-; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i16>, <vscale x 4 x i16>*
+; CHECK-VF4UF1: %[[LOAD]] = load <vscale x 4 x i16>, ptr
 ; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.experimental.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[LOAD]], i32 -1)
 ; CHECK-VF4UF1-NEXT: sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32>
 ; CHECK-VF4UF1-NEXT: sext <vscale x 4 x i16> %[[LOAD]] to <vscale x 4 x i32>
 entry:
-  %.pre = load i16, i16* %a
+  %.pre = load i16, ptr %a
   br label %for.body
 
 for.body:
@@ -260,12 +259,12 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %conv = sext i16 %0 to i32
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
-  %1 = load i16, i16* %arrayidx2
+  %arrayidx2 = getelementptr inbounds i16, ptr %a, i64 %indvars.iv.next
+  %1 = load i16, ptr %arrayidx2
   %conv3 = sext i16 %1 to i32
   %mul = mul nsw i32 %conv3, %conv
-  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  store i32 %mul, i32* %arrayidx5
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  store i32 %mul, ptr %arrayidx5
   %exitcond = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
 
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
index 02e6eeb..f932164 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
@@ -7,7 +7,7 @@
 ;   }
 ; with an unroll factor (interleave count) of 2.
 
-define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) {
+define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add_ind64_unrolled(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -29,26 +29,22 @@ define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocaptur
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP8]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP9]], 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP7]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP13]], align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP11]]
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i64>, ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[TMP14:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP14]], <vscale x 2 x i64>* [[TMP17]], align 8
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP14]], ptr [[TMP16]], align 8
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP19:%.*]] = shl i32 [[TMP18]], 1
 ; CHECK-NEXT:    [[TMP20:%.*]] = sext i32 [[TMP19]] to i64
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, i64* [[TMP16]], i64 [[TMP20]]
-; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i64* [[TMP21]] to <vscale x 2 x i64>*
-; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64>* [[TMP22]], align 8
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i64 [[TMP20]]
+; CHECK-NEXT:    store <vscale x 2 x i64> [[TMP15]], ptr [[TMP21]], align 8
 ; CHECK-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP24:%.*]] = shl i64 [[TMP23]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP24]]
@@ -63,11 +59,11 @@ define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocaptur
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP26:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP26]], [[I_08]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -79,11 +75,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %i.08
-  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 %i.08
-  store i64 %add, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 %i.08
+  store i64 %add, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0
@@ -95,7 +91,7 @@ exit:                                 ; preds = %for.body
 
 ; Same as above, except we test with a vectorisation factor of (1, scalable)
 
-define void @add_ind64_unrolled_nxv1i64(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) {
+define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) {
 ; CHECK-LABEL: @add_ind64_unrolled_nxv1i64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -116,24 +112,20 @@ define void @add_ind64_unrolled_nxv1i64(i64* noalias nocapture %a, i64* noalias
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, <vscale x 1 x i64>* [[TMP7]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 1 x i64>, <vscale x 1 x i64>* [[TMP11]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP9]]
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 1 x i64>, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i64* [[TMP14]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP12]], <vscale x 1 x i64>* [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP12]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[TMP14]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64* [[TMP18]] to <vscale x 1 x i64>*
-; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP13]], <vscale x 1 x i64>* [[TMP19]], align 8
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP17]]
+; CHECK-NEXT:    store <vscale x 1 x i64> [[TMP13]], ptr [[TMP18]], align 8
 ; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP21:%.*]] = shl i64 [[TMP20]], 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]]
@@ -148,11 +140,11 @@ define void @add_ind64_unrolled_nxv1i64(i64* noalias nocapture %a, i64* noalias
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP23:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP23]], [[I_08]]
-; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -164,11 +156,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.08
-  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.08
+  %0 = load i64, ptr %arrayidx, align 8
   %add = add nsw i64 %0, %i.08
-  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 %i.08
-  store i64 %add, i64* %arrayidx1, align 8
+  %arrayidx1 = getelementptr inbounds i64, ptr %a, i64 %i.08
+  store i64 %add, ptr %arrayidx1, align 8
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !9
@@ -187,7 +179,7 @@ exit:                                 ; preds = %for.body
 ; with an unroll factor (interleave count) of 1.
 
 
-define void @add_unique_ind32(i32* noalias nocapture %a, i64 %n) {
+define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-LABEL: @add_unique_ind32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -211,9 +203,8 @@ define void @add_unique_ind32(i32* noalias nocapture %a, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC_IND]], <vscale x 4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = shl i64 [[TMP10]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -230,8 +221,8 @@ define void @add_unique_ind32(i32* noalias nocapture %a, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store i32 [[R_07]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store i32 [[R_07]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[R_07]], 2
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
@@ -245,8 +236,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i.08
-  store i32 %r.07, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i.08
+  store i32 %r.07, ptr %arrayidx, align 4
   %add = add nuw nsw i32 %r.07, 2
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n
@@ -265,7 +256,7 @@ exit:                                 ; preds = %for.body
 ;   }
 ; with an unroll factor (interleave count) of 1.
 
-define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) {
+define void @add_unique_indf32(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-LABEL: @add_unique_indf32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -294,9 +285,8 @@ define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <vscale x 4 x float>*
-; CHECK-NEXT:    store <vscale x 4 x float> [[VEC_IND]], <vscale x 4 x float>* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[VEC_IND]], ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP15:%.*]] = shl i64 [[TMP14]], 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]]
@@ -313,8 +303,8 @@ define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[R_07:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I_08]]
-; CHECK-NEXT:    store float [[R_07]], float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    store float [[R_07]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ADD]] = fadd float [[R_07]], 2.000000e+00
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
@@ -328,8 +318,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %r.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %i.08
-  store float %r.07, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %i.08
+  store float %r.07, ptr %arrayidx, align 4
   %add = fadd float %r.07, 2.000000e+00
   %inc = add nuw nsw i64 %i.08, 1
   %exitcond.not = icmp eq i64 %inc, %n
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
index b703cedc..2b1c32c 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll
@@ -15,13 +15,11 @@
 
 ; CHECKUF1: vector.body:
 ; CHECKUF1: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index
-; CHECKUF1: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to <vscale x 4 x double>*
-; CHECKUF1: %wide.load = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_CAST]], align 8
+; CHECKUF1: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
+; CHECKUF1: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
 ; CHECKUF1: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index
-; CHECKUF1: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>*
-; CHECKUF1: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8
+; CHECKUF1: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
+; CHECKUF1: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
 ; CHECKUF1: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
 ; CHECKUF1: %[[VSCALEX4:.*]] = shl i64 %[[VSCALE]], 2
 ; CHECKUF1: %index.next = add nuw i64 %index, %[[VSCALEX4]]
@@ -46,33 +44,29 @@
 
 ; CHECKUF2: vector.body:
 ; CHECKUF2: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, double* %b, i64 %index
-; CHECKUF2: %[[IDXB_CAST:.*]] = bitcast double* %[[IDXB]] to <vscale x 4 x double>*
-; CHECKUF2: %wide.load = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_CAST]], align 8
+; CHECKUF2: %[[IDXB:.*]] = getelementptr inbounds double, ptr %b, i64 %index
+; CHECKUF2: %wide.load = load <vscale x 4 x double>, ptr %[[IDXB]], align 8
 ; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32()
 ; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2
 ; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64
-; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXB]], i64 %[[VSCALE2_EXT]]
-; CHECKUF2: %[[IDXB_NEXT_CAST:.*]] = bitcast double* %[[IDXB_NEXT]] to <vscale x 4 x double>*
-; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, <vscale x 4 x double>* %[[IDXB_NEXT_CAST]], align 8
+; CHECKUF2: %[[IDXB_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXB]], i64 %[[VSCALE2_EXT]]
+; CHECKUF2: %wide.load{{[0-9]+}} = load <vscale x 4 x double>, ptr %[[IDXB_NEXT]], align 8
 ; CHECKUF2: %[[FADD:.*]] = fadd <vscale x 4 x double> %wide.load, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECKUF2: %[[FADD_NEXT:.*]] = fadd <vscale x 4 x double> %wide.load{{[0-9]+}}, shufflevector (<vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, double* %a, i64 %index
-; CHECKUF2: %[[IDXA_CAST:.*]] = bitcast double* %[[IDXA]] to <vscale x 4 x double>*
-; CHECKUF2: store <vscale x 4 x double> %[[FADD]], <vscale x 4 x double>* %[[IDXA_CAST]], align 8
+; CHECKUF2: %[[IDXA:.*]] = getelementptr inbounds double, ptr %a, i64 %index
+; CHECKUF2: store <vscale x 4 x double> %[[FADD]], ptr %[[IDXA]], align 8
 ; CHECKUF2: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32()
 ; CHECKUF2: %[[VSCALE2:.*]] = shl i32 %[[VSCALE]], 2
 ; CHECKUF2: %[[VSCALE2_EXT:.*]] = sext i32 %[[VSCALE2]] to i64
-; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, double* %[[IDXA]], i64 %[[VSCALE2_EXT]]
-; CHECKUF2: %[[IDXA_NEXT_CAST:.*]] = bitcast double* %[[IDXA_NEXT]] to <vscale x 4 x double>*
-; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], <vscale x 4 x double>* %[[IDXA_NEXT_CAST]], align 8
+; CHECKUF2: %[[IDXA_NEXT:.*]] = getelementptr inbounds double, ptr %[[IDXA]], i64 %[[VSCALE2_EXT]]
+; CHECKUF2: store <vscale x 4 x double> %[[FADD_NEXT]], ptr %[[IDXA_NEXT]], align 8
 ; CHECKUF2: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64()
 ; CHECKUF2: %[[VSCALEX8:.*]] = shl i64 %[[VSCALE]], 3
 ; CHECKUF2: %index.next = add nuw i64 %index, %[[VSCALEX8]]
 ; CHECKUF2: %[[CMP:.*]] = icmp eq i64 %index.next, %n.vec
 ; CHECKUF2: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0
 
-define void @loop(i32 %N, double* nocapture %a, double* nocapture readonly %b) {
+define void @loop(i32 %N, ptr nocapture %a, ptr nocapture readonly %b) {
 entry:
   %cmp7 = icmp sgt i32 %N, 0
   br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
@@ -86,11 +80,11 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx, align 8
   %add = fadd double %0, 1.000000e+00
-  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
-  store double %add, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  store double %add, ptr %arrayidx2, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
index c18a03f..7d82dae 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2  -S | FileCheck %s
 
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
+define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
 entry:
   br label %for.body
 
@@ -15,13 +15,13 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
   %add = fadd float %0, 1.000000e+00
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
@@ -32,9 +32,9 @@ for.end:                                          ; preds = %for.body
 
 declare void @llvm.experimental.noalias.scope.decl(metadata)
 
-%struct.data = type { float*, float* }
+%struct.data = type { ptr, ptr }
 
-define void @test2(float* %a, float* %b) {
+define void @test2(ptr %a, ptr %b) {
 ; CHECK-LABEL: @test2
 ; CHECK: vector.body:
 ; CHECK: @llvm.experimental.noalias.scope.decl(metadata [[SCOPE0_LIST:!.*]])
@@ -46,21 +46,21 @@ define void @test2(float* %a, float* %b) {
 ; CHECK-NOT: @llvm.experimental.noalias.scope.decl
 ; CHECK: ret void
 entry:
-  %ptrint = ptrtoint float* %b to i64
+  %ptrint = ptrtoint ptr %b to i64
   %maskcond = icmp eq i64 %ptrint, 0
-  %ptrint2 = ptrtoint float* %a to i64
+  %ptrint2 = ptrtoint ptr %a to i64
   %maskcond4 = icmp eq i64 %ptrint2, 0
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
   tail call void @llvm.experimental.noalias.scope.decl(metadata !4)
-  %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  store float %add, float* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  store float %add, ptr %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv, 1599
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
@@ -69,7 +69,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-define void @predicated_noalias_scope_decl(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %n) {
+define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %n) {
 
 ; Check that the vector.body still contains a llvm.experimental.noalias.scope.decl
 
@@ -99,11 +99,11 @@ if.else:                                          ; preds = %for.body
 
 if.end5:                                          ; preds = %for.body, %if.else
   %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %x.0, %0
-  %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  store float %mul, float* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  store float %mul, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %cmp = icmp eq i64 %indvars.iv.next, %n
   br i1 %cmp, label %for.cond.cleanup, label %for.body, !llvm.loop !5
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index 2fb10e6..a1c3089 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
+define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_add_trunc(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
@@ -11,8 +11,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
 ; CHECK:         [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, <vscale x 8 x i8>*
-; CHECK:         [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, <vscale x 8 x i8>*
+; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr
+; CHECK:         [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
 ; CHECK-NEXT:    [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
 ; CHECK-NEXT:    [[TMP28:%.*]] = add <vscale x 8 x i32> [[TMP14]], [[TMP26]]
@@ -39,8 +39,8 @@ loop:                                           ; preds = %entry, %loop
   %indvars.iv = phi i32 [ %indvars.iv.next, %loop ], [ 0, %entry ]
   %sum.02p = phi i32 [ %l9, %loop ], [ 255, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
-  %l3 = load i8, i8* %l2, align 4
+  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
+  %l3 = load i8, ptr %l2, align 4
   %l3e = zext i8 %l3 to i32
   %l9 = add i32 %sum.02, %l3e
   %indvars.iv.next = add i32 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
index 30455b5..a99f875 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=loop-vectorize,instsimplify -scalable-vectorization=on -force-target-supports-scalable-vectors -S | FileCheck %s
 
-define void @trunc_minimal_bitwidth(i8* %bptr, i16* noalias %hptr, i32 %val, i64 %N) {
+define void @trunc_minimal_bitwidth(ptr %bptr, ptr noalias %hptr, i32 %val, i64 %N) {
 ; CHECK-LABEL: @trunc_minimal_bitwidth(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -19,9 +19,8 @@ define void @trunc_minimal_bitwidth(i8* %bptr, i16* noalias %hptr, i32 %val, i64
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <vscale x 4 x i32> [[BROADCAST_SPLAT2]] to <vscale x 4 x i16>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[HPTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <vscale x 4 x i16>*
-; CHECK-NEXT:    store <vscale x 4 x i16> [[TMP4]], <vscale x 4 x i16>* [[TMP6]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[HPTR:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x i16> [[TMP4]], ptr [[TMP5]], align 2
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
@@ -36,8 +35,8 @@ define void @trunc_minimal_bitwidth(i8* %bptr, i16* noalias %hptr, i32 %val, i64
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[CONV21:%.*]] = trunc i32 [[VAL]] to i16
-; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[HPTR]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i16 [[CONV21]], i16* [[ARRAYIDX23]], align 2
+; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[HPTR]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i16 [[CONV21]], ptr [[ARRAYIDX23]], align 2
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -49,11 +48,11 @@ entry:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %0 = load i8, i8* %bptr, align 1
+  %0 = load i8, ptr %bptr, align 1
   %conv = zext i8 %0 to i32
   %conv21 = trunc i32 %val to i16
-  %arrayidx23 = getelementptr inbounds i16, i16* %hptr, i64 %indvars.iv
-  store i16 %conv21, i16* %arrayidx23, align 2
+  %arrayidx23 = getelementptr inbounds i16, ptr %hptr, i64 %indvars.iv
+  store i16 %conv21, ptr %arrayidx23, align 2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %N
   br i1 %exitcond.not, label %for.exit, label %for.body, !llvm.loop !0
@@ -62,7 +61,7 @@ for.exit:
   ret void
 }
 
-define void @trunc_minimal_bitwidths_shufflevector (i8* %p, i32 %arg1, i64 %len) {
+define void @trunc_minimal_bitwidths_shufflevector (ptr %p, i32 %arg1, i64 %len) {
 ; CHECK-LABEL: @trunc_minimal_bitwidths_shufflevector(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
@@ -79,14 +78,12 @@ define void @trunc_minimal_bitwidths_shufflevector (i8* %p, i32 %arg1, i64 %len)
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <vscale x 4 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <vscale x 4 x i32> [[BROADCAST_SPLAT]] to <vscale x 4 x i8>
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor <vscale x 4 x i8> [[WIDE_LOAD]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <vscale x 4 x i8> [[TMP7]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP4]] to <vscale x 4 x i8>*
-; CHECK-NEXT:    store <vscale x 4 x i8> [[TMP8]], <vscale x 4 x i8>* [[TMP9]], align 1
+; CHECK-NEXT:    store <vscale x 4 x i8> [[TMP8]], ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -97,13 +94,13 @@ entry:
 
 for.body:                                         ; preds = %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 %indvars.iv
+  %0 = load i8, ptr %arrayidx
   %conv = zext i8 %0 to i32
   %conv17 = xor i32 %conv, %arg1
   %mul18 = mul nuw nsw i32 %conv17, %conv
   %conv19 = trunc i32 %mul18 to i8
-  store i8 %conv19, i8* %arrayidx
+  store i8 %conv19, ptr %arrayidx
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %len
   br i1 %exitcond, label %for.exit, label %for.body, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
index 76712b5..e51e49e 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-vf-hint.ll
@@ -6,20 +6,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: LV: User VF=vscale x 4 is ignored because scalable vectors are not available.
 ; CHECK: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is ignored because the target does not support scalable vectors. The compiler will pick a more suitable value.
 ; CHECK: LV: The Widest register safe to use is: 32 bits.
-define void @test1(i32* %a, i32* %b) {
+define void @test1(ptr %a, ptr %b) {
 entry:
   br label %loop
 
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 4
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/scalar-select.ll b/llvm/test/Transforms/LoopVectorize/scalar-select.ll
index 1cabe82..1905c36 100644
--- a/llvm/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalar-select.ll
@@ -17,14 +17,14 @@ define void @example1(i1 %cond) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
   %sel = select i1 %cond, i32 %6, i32 zeroinitializer
-  store i32 %sel, i32* %7, align 4
+  store i32 %sel, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 256
diff --git a/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll b/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll
index 62d492e..f1307dc 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-during-mutation.ll
@@ -31,7 +31,7 @@ exit:
   ret void
 }
 
-define void @pr49900(i32 %x, i64* %ptr) {
+define void @pr49900(i32 %x, ptr %ptr) {
 ; CHECK-LABEL: @pr49900
 ; CHECK: vector.body{{.*}}:
 ; CHECK: vector.body{{.*}}:
@@ -79,7 +79,7 @@ exit:
 ; CHECK-LABEL: @pr52024(
 ; CHECK: vector.body:
 ;
-define void @pr52024(i32* %dst, i16 %N) {
+define void @pr52024(ptr %dst, i16 %N) {
 entry:
   br label %loop.1
 
@@ -108,8 +108,8 @@ loop.3:
   %sub.phi = phi i16 [ 0, %loop.2.header ], [ %sub, %loop.3 ]
   %sub = sub i16 %sub.phi, %rem.trunc
   %sub.ext = zext i16 %sub to i32
-  %gep.dst = getelementptr i32, i32* %dst, i32 %iv.3
-  store i32 %sub.ext, i32* %gep.dst
+  %gep.dst = getelementptr i32, ptr %dst, i32 %iv.3
+  store i32 %sub.ext, ptr %gep.dst
   %iv.3.next= add nuw nsw i32 %iv.3, 1
   %exitcond.3 = icmp eq i32 %iv.3.next, 34
   br i1 %exitcond.3, label %loop.2.latch, label %loop.3
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
index 1c5d4dd..e8c7563 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
@@ -1,17 +1,17 @@
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s --check-prefix=CHECK-VF2IC1
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC2
 
-define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) {
+define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) {
 ; CHECK-VF2IC1-LABEL: @pred_select_const_i32_from_icmp(
 ; CHECK-VF2IC1:       vector.body:
 ; CHECK-VF2IC1:         [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ]
-; CHECK-VF2IC1:         [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* {{%.*}}, align 4
+; CHECK-VF2IC1:         [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{%.*}}, align 4
 ; CHECK-VF2IC1-NEXT:    [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 35, i32 35>
 ; CHECK-VF2IC1-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
 ; CHECK-VF2IC1-NEXT:    br i1 [[TMP5]], label %pred.load.if, label %pred.load.continue
 ; CHECK-VF2IC1:       pred.load.if:
-; CHECK-VF2IC1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC2:%.*]], i64 {{%.*}}
-; CHECK-VF2IC1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+; CHECK-VF2IC1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}}
+; CHECK-VF2IC1-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
 ; CHECK-VF2IC1-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
 ; CHECK-VF2IC1-NEXT:    br label %pred.load.continue
 ; CHECK-VF2IC1:       pred.load.continue:
@@ -19,8 +19,8 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF2IC1-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
 ; CHECK-VF2IC1-NEXT:    br i1 [[TMP10]], label %pred.load.if1, label %pred.load.continue2
 ; CHECK-VF2IC1:       pred.load.if1:
-; CHECK-VF2IC1:         [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[SRC2]], i64 {{%.*}}
-; CHECK-VF2IC1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
+; CHECK-VF2IC1:         [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}}
+; CHECK-VF2IC1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-VF2IC1-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
 ; CHECK-VF2IC1-NEXT:    br label %pred.load.continue2
 ; CHECK-VF2IC1:       pred.load.continue2:
@@ -40,11 +40,11 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF2IC1-NEXT:    br label %for.body
 ; CHECK-VF2IC1:       for.body:
 ; CHECK-VF2IC1:         [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ]
-; CHECK-VF2IC1:         [[TMP21:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF2IC1:         [[TMP21:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF2IC1-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP21]], 35
 ; CHECK-VF2IC1-NEXT:    br i1 [[CMP1]], label %if.then, label %for.inc
 ; CHECK-VF2IC1:       if.then:
-; CHECK-VF2IC1:         [[TMP22:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF2IC1:         [[TMP22:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF2IC1-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP22]], 2
 ; CHECK-VF2IC1-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]]
 ; CHECK-VF2IC1-NEXT:    br label %for.inc
@@ -58,23 +58,23 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF1IC2:       vector.body:
 ; CHECK-VF1IC2:         [[VEC_PHI:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue3 ]
 ; CHECK-VF1IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI5:%.*]], %pred.load.continue3 ]
-; CHECK-VF1IC2:         [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[SRC1:%.*]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC1]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-VF1IC2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-VF1IC2:         [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4
 ; CHECK-VF1IC2-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 35
 ; CHECK-VF1IC2-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 35
 ; CHECK-VF1IC2-NEXT:    br i1 [[TMP4]], label %pred.load.if, label %pred.load.continue
 ; CHECK-VF1IC2:       pred.load.if:
-; CHECK-VF1IC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC2:%.*]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
 ; CHECK-VF1IC2-NEXT:    br label %pred.load.continue
 ; CHECK-VF1IC2:       pred.load.continue:
 ; CHECK-VF1IC2-NEXT:    [[TMP8:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP7]], %pred.load.if ]
 ; CHECK-VF1IC2-NEXT:    br i1 [[TMP5]], label %pred.load.if2, label %pred.load.continue3
 ; CHECK-VF1IC2:       pred.load.if2:
-; CHECK-VF1IC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[SRC2]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}}
+; CHECK-VF1IC2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
 ; CHECK-VF1IC2-NEXT:    br label %pred.load.continue3
 ; CHECK-VF1IC2:       pred.load.continue3:
 ; CHECK-VF1IC2-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP10]], %pred.load.if2 ]
@@ -98,11 +98,11 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src
 ; CHECK-VF1IC2:       for.body:
 ; CHECK-VF1IC2-NEXT:    [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %for.inc ], [ [[BC_RESUME_VAL]], %scalar.ph ]
 ; CHECK-VF1IC2-NEXT:    [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ]
-; CHECK-VF1IC2:         [[TMP19:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF1IC2:         [[TMP19:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF1IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP19]], 35
 ; CHECK-VF1IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label %for.inc
 ; CHECK-VF1IC2:       if.then:
-; CHECK-VF1IC2:         [[TMP20:%.*]] = load i32, i32* {{%.*}}, align 4
+; CHECK-VF1IC2:         [[TMP20:%.*]] = load i32, ptr {{%.*}}, align 4
 ; CHECK-VF1IC2-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[TMP20]], 2
 ; CHECK-VF1IC2-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]]
 ; CHECK-VF1IC2-NEXT:    br label %for.inc
@@ -119,14 +119,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.inc
   %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %i.013
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %src1, i64 %i.013
+  %0 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 35
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %i.013
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %src2, i64 %i.013
+  %1 = load i32, ptr %arrayidx2, align 4
   %cmp3 = icmp eq i32 %1, 2
   %spec.select = select i1 %cmp3, i32 1, i32 %r.012
   br label %for.inc
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 7d36798..49afe95 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 --check-prefix=CHECK
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 --check-prefix=CHECK
 
-define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -70,8 +70,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 7
   %6 = add nuw nsw i64 %0, 1
@@ -83,7 +83,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_const_i32_from_icmp2(i32* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_icmp2
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -101,8 +101,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 7, i32 %1
   %6 = add nuw nsw i64 %0, 1
@@ -114,7 +114,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64 %n) {
+define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) {
 ; CHECK-LABEL: @select_i32_from_icmp
 ; CHECK-VF4IC1:      vector.ph:
 ; CHECK-VF4IC1:        [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
@@ -138,8 +138,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ %a, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select i1 %4, i32 %1, i32 %b
   %6 = add nuw nsw i64 %0, 1
@@ -151,7 +151,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_const_i32_from_fcmp_fast(float* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_fcmp_fast
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -168,8 +168,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds float, float* %v, i64 %0
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %v, i64 %0
+  %3 = load float, ptr %2, align 4
   %4 = fcmp fast ueq float %3, 3.0
   %5 = select i1 %4, i32 %1, i32 1
   %6 = add nuw nsw i64 %0, 1
@@ -181,7 +181,7 @@ exit:                                     ; preds = %for.body
 }
 
 
-define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) {
+define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_fcmp
 ; CHECK-VF4IC1:      vector.body:
 ; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
@@ -198,8 +198,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds float, float* %v, i64 %0
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %v, i64 %0
+  %3 = load float, ptr %2, align 4
   %4 = fcmp ueq float %3, 3.0
   %5 = select i1 %4, i32 %1, i32 1
   %6 = add nuw nsw i64 %0, 1
@@ -248,7 +248,7 @@ exit:                                     ; preds = %for.body
 ; Negative tests
 
 ; We don't support FP reduction variables at the moment.
-define float @select_const_f32_from_icmp(i32* nocapture readonly %v, i64 %n) {
+define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) {
 ; CHECK: @select_const_f32_from_icmp
 ; CHECK-NOT: vector.body
 entry:
@@ -257,8 +257,8 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
   %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v, i64 %0
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds i32, ptr %v, i64 %0
+  %3 = load i32, ptr %2, align 4
   %4 = icmp eq i32 %3, 3
   %5 = select fast i1 %4, float %1, float 7.0
   %6 = add nuw nsw i64 %0, 1
@@ -272,7 +272,7 @@ exit:                                     ; preds = %for.body
 
 ; We don't support select/cmp reduction patterns where there is more than one
 ; use of the icmp/fcmp.
-define i32 @select_const_i32_from_icmp_mul_use(i32* nocapture readonly %v1, i32* %v2, i64 %n) {
+define i32 @select_const_i32_from_icmp_mul_use(ptr nocapture readonly %v1, ptr %v2, i64 %n) {
 ; CHECK-LABEL: @select_const_i32_from_icmp_mul_use
 ; CHECK-NOT: vector.body
 entry:
@@ -282,8 +282,8 @@ for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %6, %for.body ]
   %2 = phi i32 [ 0, %entry ], [ %7, %for.body ]
-  %3 = getelementptr inbounds i32, i32* %v1, i64 %0
-  %4 = load i32, i32* %3, align 4
+  %3 = getelementptr inbounds i32, ptr %v1, i64 %0
+  %4 = load i32, ptr %3, align 4
   %5 = icmp eq i32 %4, 3
   %6 = select i1 %5, i32 %1, i32 7
   %7 = zext i1 %5 to i32
@@ -292,13 +292,13 @@ for.body:                                      ; preds = %entry, %for.body
   br i1 %9, label %exit, label %for.body
 
 exit:                                     ; preds = %for.body
-  store i32 %7, i32* %v2, align 4
+  store i32 %7, ptr %v2, align 4
   ret i32 %6
 }
 
 
 ; We don't support selecting loop-variant values.
-define i32 @select_variant_i32_from_icmp(i32* nocapture readonly %v1, i32* nocapture readonly %v2, i64 %n) {
+define i32 @select_variant_i32_from_icmp(ptr nocapture readonly %v1, ptr nocapture readonly %v2, i64 %n) {
 ; CHECK-LABEL: @select_variant_i32_from_icmp
 ; CHECK-NOT: vector.body
 entry:
@@ -307,10 +307,10 @@ entry:
 for.body:                                      ; preds = %entry, %for.body
   %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
   %1 = phi i32 [ 3, %entry ], [ %7, %for.body ]
-  %2 = getelementptr inbounds i32, i32* %v1, i64 %0
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %v2, i64 %0
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds i32, ptr %v1, i64 %0
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds i32, ptr %v2, i64 %0
+  %5 = load i32, ptr %4, align 4
   %6 = icmp eq i32 %3, 3
   %7 = select i1 %6, i32 %1, i32 %5
   %8 = add nuw nsw i64 %0, 1
diff --git a/llvm/test/Transforms/LoopVectorize/simple-unroll.ll b/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
index 16344183..3199b69 100644
--- a/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -22,11 +22,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/skip-iterations.ll b/llvm/test/Transforms/LoopVectorize/skip-iterations.ll
index afea083..f3fdb90 100644
--- a/llvm/test/Transforms/LoopVectorize/skip-iterations.ll
+++ b/llvm/test/Transforms/LoopVectorize/skip-iterations.ll
@@ -22,7 +22,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; }
 ; CHECK-LABEL: test1(
 ; CHECK-NOT: <4 x i32>
-define i32 @test1(i32* nocapture %A, i32 %Length) {
+define i32 @test1(ptr nocapture %A, i32 %Length) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end
@@ -32,13 +32,13 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4, !tbaa !15
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4, !tbaa !15
   %cmp1 = icmp sgt i32 %0, 10
   br i1 %cmp1, label %end.loopexit, label %if.else
 
 if.else:                                          ; preds = %for.body
-  store i32 0, i32* %arrayidx, align 4, !tbaa !15
+  store i32 0, ptr %arrayidx, align 4, !tbaa !15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %1 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %1, %Length
@@ -65,7 +65,7 @@ end:                                              ; preds = %end.loopexit, %entr
 
 ; CHECK-LABEL: test2(
 ; CHECK-NOT: <4 x i32>
-define i32 @test2(i32* nocapture %A, i32 %Length, i32 %K) {
+define i32 @test2(ptr nocapture %A, i32 %Length, i32 %K) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end
@@ -75,8 +75,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %ld = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %ld, %K
   br i1 %cmp1, label %end.loopexit, label %if.else
 
@@ -107,7 +107,7 @@ end:                                              ; preds = %end.loopexit, %entr
 ; we handle the speculation safety of the widened load).
 ; CHECK-LABEL: test3(
 ; CHECK-NOT: <4 x i32>
-define i32 @test3(i32* nocapture %A, i32 %Length, i32 %K) {
+define i32 @test3(ptr nocapture %A, i32 %Length, i32 %K) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end
@@ -117,8 +117,8 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %ld = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %ld, %K
   br i1 %cmp1, label %end.loopexit, label %if.else
 
@@ -147,7 +147,7 @@ end:                                              ; preds = %end.loopexit, %entr
 ; For this test, we vectorize and generate predicated stores to A[i].
 ; CHECK-LABEL: test4(
 ; CHECK: <4 x i32>
-define void @test4(i32* nocapture %A, i32 %Length, i32 %K, i32 %J) {
+define void @test4(ptr nocapture %A, i32 %Length, i32 %K, i32 %J) {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0
   br i1 %cmp8, label %for.body.preheader, label %end.loopexit
@@ -157,15 +157,15 @@ for.body.preheader:                               ; preds = %entry
 
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %trunc = trunc i64 %indvars.iv.next to i32
-  %ld = load i32, i32* %arrayidx, align 4
+  %ld = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp eq i32 %ld, %K
   br i1 %cmp1, label %latch, label %if.else
 
 if.else:
-  store i32 %J, i32* %arrayidx, align 4
+  store i32 %J, ptr %arrayidx, align 4
   br label %latch
 
 latch:                                          ; preds = %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/small-loop.ll b/llvm/test/Transforms/LoopVectorize/small-loop.ll
index 8f9685a..5705ee3 100644
--- a/llvm/test/Transforms/LoopVectorize/small-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/small-loop.ll
@@ -14,13 +14,13 @@ define void @example1() nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 8  ;   <-----  A really small trip count
@@ -38,13 +38,13 @@ define void @bound1(i32 %k) nounwind uwtable ssp {
 
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @b, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
+  %4 = getelementptr inbounds [2048 x i32], ptr @c, i64 0, i64 %indvars.iv
+  %5 = load i32, ptr %4, align 4
   %6 = add nsw i32 %5, %3
-  %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %6, i32* %7, align 4
+  %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %6, ptr %7, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %large = icmp sge i32 %lftr.wideiv, 8
diff --git a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
index 93cc8de..35d4ff0 100644
--- a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @start_at_nonzero(
 ;CHECK: mul nuw <4 x i32>
 ;CHECK: ret i32
-define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
+define i32 @start_at_nonzero(ptr nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
 entry:
   %cmp3 = icmp slt i32 %start, %end
   br i1 %cmp3, label %for.body.lr.ph, label %for.end
@@ -16,10 +16,10 @@ for.body.lr.ph:                                   ; preds = %entry
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
   %mul = mul nuw i32 %1, 333
-  store i32 %mul, i32* %arrayidx, align 4
+  store i32 %mul, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %2 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %2, %end
diff --git a/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
index 68ad00e..bb9e8a5 100644
--- a/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
@@ -29,16 +29,16 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ 93, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add i64 %indvars.iv, 1
-  %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @uf, i64 0, i64 %0
-  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* @xi, i64 0, i64 %0
-  %1 = load i32, i32* %arrayidx3, align 4
-  %2 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [100 x i32], ptr @uf, i64 0, i64 %0
+  %arrayidx3 = getelementptr inbounds [100 x i32], ptr @xi, i64 0, i64 %0
+  %1 = load i32, ptr %arrayidx3, align 4
+  %2 = load i32, ptr %arrayidx, align 4
   %add4 = add nsw i32 %2, %1
-  store i32 %add4, i32* %arrayidx, align 4
-  %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* @q, i64 0, i64 %0
-  %3 = load i32, i32* %arrayidx7, align 4
+  store i32 %add4, ptr %arrayidx, align 4
+  %arrayidx7 = getelementptr inbounds [100 x i32], ptr @q, i64 0, i64 %0
+  %3 = load i32, ptr %arrayidx7, align 4
   %add8 = add nsw i32 %add4, %3
-  store i32 %add8, i32* %arrayidx, align 4
+  store i32 %add8, ptr %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, -1
   %4 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp ugt i32 %4, 2
diff --git a/llvm/test/Transforms/LoopVectorize/struct_access.ll b/llvm/test/Transforms/LoopVectorize/struct_access.ll
index 0e2401b..6365234 100644
--- a/llvm/test/Transforms/LoopVectorize/struct_access.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct_access.ll
@@ -23,7 +23,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @foo(
 ;CHECK-NOT: load <4 x i32>
 ;CHECK: ret
-define i32 @foo(%struct.coordinate* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @foo(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -31,8 +31,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %x = getelementptr inbounds %struct.coordinate, %struct.coordinate* %A, i64 %indvars.iv, i32 0
-  %0 = load i32, i32* %x, align 4
+  %x = getelementptr inbounds %struct.coordinate, ptr %A, i64 %indvars.iv, i32 0
+  %0 = load i32, ptr %x, align 4
   %add = add nsw i32 %0, %sum.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -65,7 +65,7 @@ for.end:                                          ; preds = %for.body, %entry
 ;CHECK-LABEL: @bar(
 ;CHECK: load <4 x i32>
 ;CHECK: ret
-define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable readonly ssp {
 entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.body, label %for.end
@@ -73,8 +73,8 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
-  %x = getelementptr inbounds %struct.lit, %struct.lit* %A, i64 %indvars.iv, i32 0
-  %0 = load i32, i32* %x, align 4
+  %x = getelementptr inbounds %struct.lit, ptr %A, i64 %indvars.iv, i32 0
+  %0 = load i32, ptr %x, align 4
   %add = add nsw i32 %0, %sum.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
index f88cedf..0f03d39 100644
--- a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
+++ b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
@@ -4,14 +4,14 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; TBAA partitions the accesses in this loop, so it can be vectorized without
 ; runtime checks.
-define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) {
+define i32 @test1(ptr nocapture %a, ptr nocapture readonly %b) {
 ; CHECK-LABEL: @test1
 ; CHECK: entry:
 ; CHECK-NEXT: br label %vector.body
 ; CHECK: vector.body:
 
-; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+; CHECK: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x i32> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK: ret i32 0
 
@@ -21,8 +21,8 @@ define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) {
 ; CHECK-NOTBAA-NOT: icmp
 ; CHECK-NOTBAA: br i1 {{.+}}, label %for.body, label %vector.body
 
-; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x i32> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK-NOTBAA: ret i32 0
 
@@ -31,11 +31,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !0
   %conv = fptosi float %0 to i32
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4, !tbaa !4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.end, label %for.body
@@ -46,15 +46,15 @@ for.end:                                          ; preds = %for.body
 
 ; This test is like the first, except here there is still one runtime check
 ; required. Without TBAA, however, two checks are required.
-define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) {
+define i32 @test2(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c) {
 ; CHECK-LABEL: @test2
 ; CHECK: entry:
 ; CHECK: icmp ult i64
 ; CHECK-NOT: icmp
 ; CHECK: br i1 {{.+}}, label %for.body, label %vector.body
 
-; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK: ret i32 0
 
@@ -65,8 +65,8 @@ define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, floa
 ; CHECK-NOTBAA-NOT: icmp
 ; CHECK-NOTBAA: br i1 {{.+}}, label %for.body, label %vector.body
 
-; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
-; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: load <4 x float>, ptr %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x float> %{{.*}}, ptr %{{.*}}, align 4, !tbaa
 
 ; CHECK-NOTBAA: ret i32 0
 
@@ -75,14 +75,14 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4, !tbaa !0
-  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4, !tbaa !4
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4, !tbaa !4
   %conv = sitofp i32 %1 to float
   %mul = fmul float %0, %conv
-  %arrayidx4 = getelementptr inbounds float, float* %c, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %mul, ptr %arrayidx4, align 4, !tbaa !0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1600
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/tripcount.ll b/llvm/test/Transforms/LoopVectorize/tripcount.ll
index 6c57bec..ab88c70 100644
--- a/llvm/test/Transforms/LoopVectorize/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/tripcount.ll
@@ -18,11 +18,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !1
@@ -43,11 +43,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !1
@@ -73,11 +73,11 @@ for.preheader:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp eq i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !3
@@ -98,11 +98,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp sgt i32 %i.08, %bound
   br i1 %exitcond, label %for.end, label %for.body, !prof !1
@@ -122,11 +122,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %i.08, 2
   br i1 %exitcond, label %for.body, label %for.end
@@ -146,11 +146,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %i.08, 1000
   br i1 %exitcond, label %for.body, label %for.end
@@ -170,11 +170,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 5
   %exitcond = icmp slt i32 %i.08, 10
   br i1 %exitcond, label %for.body, label %for.end
@@ -194,11 +194,11 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
-  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
   %cmp1 = icmp eq i8 %0, 0
   %. = select i1 %cmp1, i8 2, i8 1
-  store i8 %., i8* %arrayidx, align 1
+  store i8 %., ptr %arrayidx, align 1
   %inc = add nsw i32 %i.08, 1
   %exitcond = icmp slt i32 %i.08, 1000
   br i1 %exitcond, label %for.body, label %for.end, !prof !1
diff --git a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
index 5624946..2278821 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=8 -S < %s | FileCheck %s
 
-define i8 @reduction_and_trunc(i8* noalias nocapture %ptr) {
+define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_and_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -11,9 +11,8 @@ define i8 @reduction_and_trunc(i8* noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i8>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -36,8 +35,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %and, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
-  %load = load i8, i8* %gep
+  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
+  %load = load i8, ptr %gep
   %ext = zext i8 %load to i32
   %and = and i32 %sum.02, %ext
   %iv.next = add i32 %iv, 1
@@ -49,7 +48,7 @@ for.end:
   ret i8 %ret
 }
 
-define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_or_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -59,9 +58,8 @@ define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
 ; CHECK-NEXT:    [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -84,8 +82,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = zext i16 %load to i32
   %xor = or i32 %sum.02, %ext
   %iv.next = add i32 %iv, 1
@@ -97,7 +95,7 @@ for.end:
   ret i16 %ret
 }
 
-define i16 @reduction_xor_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_xor_trunc(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -107,9 +105,8 @@ define i16 @reduction_xor_trunc(i16* noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2
 ; CHECK-NEXT:    [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -132,8 +129,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = zext i16 %load to i32
   %xor = xor i32 %sum.02, %ext
   %iv.next = add i32 %iv, 1
@@ -145,7 +142,7 @@ for.end:
   ret i16 %ret
 }
 
-define i8 @reduction_smin_trunc(i8* noalias nocapture %ptr) {
+define i8 @reduction_smin_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_smin_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -157,8 +154,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 256, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
-  %load = load i8, i8* %gep
+  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
+  %load = load i8, ptr %gep
   %ext = sext i8 %load to i32
   %icmp = icmp slt i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext
@@ -171,7 +168,7 @@ for.end:
   ret i8 %ret
 }
 
-define i8 @reduction_umin_trunc(i8* noalias nocapture %ptr) {
+define i8 @reduction_umin_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_umin_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -183,8 +180,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 255
-  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
-  %load = load i8, i8* %gep
+  %gep = getelementptr inbounds i8, ptr %ptr, i32 %iv
+  %load = load i8, ptr %gep
   %ext = zext i8 %load to i32
   %icmp = icmp ult i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext
@@ -197,7 +194,7 @@ for.end:
   ret i8 %ret
 }
 
-define i16 @reduction_smax_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_smax_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_smax_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -209,8 +206,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = sext i16 %load to i32
   %icmp = icmp sgt i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext
@@ -223,7 +220,7 @@ for.end:
   ret i16 %ret
 }
 
-define i16 @reduction_umax_trunc(i16* noalias nocapture %ptr) {
+define i16 @reduction_umax_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-LABEL: @reduction_umax_trunc(
 ; CHECK-NOT: vector.body
 ; CHECK-NOT: <8 x
@@ -235,8 +232,8 @@ for.body:
   %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
   %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
   %sum.02 = and i32 %sum.02p, 65535
-  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
-  %load = load i16, i16* %gep
+  %gep = getelementptr inbounds i16, ptr %ptr, i32 %iv
+  %load = load i16, ptr %gep
   %ext = zext i16 %load to i32
   %icmp = icmp ugt i32 %sum.02, %ext
   %min = select i1 %icmp, i32 %sum.02, i32 %ext
diff --git a/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll b/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll
index e8a5229..1fccf54 100644
--- a/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/undef-inst-bug.ll
@@ -24,8 +24,8 @@ for.body:
   ; Loop invariant anchored in loop.
   %idxprom21 = zext i32 undef to i64
 
-  %arrayidx23 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* undef, i64 0, i64 %idxprom21, i64 %indvars.iv17
-  store i32 undef, i32* %arrayidx23, align 4
+  %arrayidx23 = getelementptr inbounds [100 x [100 x i32]], ptr undef, i64 0, i64 %idxprom21, i64 %indvars.iv17
+  store i32 undef, ptr %arrayidx23, align 4
   %indvars.next= add i64 %indvars.iv17, -1
   %0 = trunc i64 %indvars.next to i32
   %cmp15 = icmp ugt i32 %0, undef
diff --git a/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll b/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
index fd5255c..faadce6 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll-novec-memcheck-metadata.ll
@@ -14,18 +14,18 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-DAG: ![[MD3]] = distinct !{![[MD3]], !"LVerDomain"}
 
 ; Function Attrs: norecurse nounwind uwtable
-define void @test(i32* nocapture readonly %a, i32* nocapture %b) local_unnamed_addr #0 {
+define void @test(ptr nocapture readonly %a, ptr nocapture %b) local_unnamed_addr #0 {
 entry:
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %l.1 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %l.2 = load i32, i32* %arrayidx2
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %l.1 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %l.2 = load i32, ptr %arrayidx2
   %add = add nsw i32 %l.1, %l.2
-  store i32 %add, i32* %arrayidx2, align 4
+  store i32 %add, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 10000
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/unroll.ll b/llvm/test/Transforms/LoopVectorize/unroll.ll
index a636e94..94e7549 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll.ll
@@ -12,7 +12,7 @@
 
 define void @foo() #0 {
 entry:
-  %0 = load i32, i32* @N, align 4
+  %0 = load i32, ptr @N, align 4
   %cmp5 = icmp sgt i32 %0, 0
   br i1 %cmp5, label %for.body.lr.ph, label %for.end
 
@@ -23,8 +23,8 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = mul nuw nsw i64 %i.06, 7
-  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %mul
-  store i32 3, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %mul
+  store i32 3, ptr %arrayidx, align 4
   %inc = add nuw nsw i64 %i.06, 1
   %cmp = icmp slt i64 %inc, %conv
   br i1 %cmp, label %for.body, label %for.end.loopexit
diff --git a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll
index d6a24e6..ba02ddd 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll_nonlatch.ll
@@ -8,7 +8,7 @@
 ; loop, we must exit to the epilogue on iteration with %indvars.iv = 1022 to
 ; avoid an out of bounds access.
 
-define void @test(double* %data) {
+define void @test(ptr %data) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -22,14 +22,14 @@ define void @test(double* %data) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[INDUCTION1]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, double* [[DATA:%.*]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[DATA]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load double, double* [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = load double, double* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load double, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = fneg double [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = fneg double [[TMP7]]
-; CHECK-NEXT:    store double [[TMP8]], double* [[TMP4]], align 8
-; CHECK-NEXT:    store double [[TMP9]], double* [[TMP5]], align 8
+; CHECK-NEXT:    store double [[TMP8]], ptr [[TMP4]], align 8
+; CHECK-NEXT:    store double [[TMP9]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1022
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -46,10 +46,10 @@ define void @test(double* %data) {
 ; CHECK:       for.latch:
 ; CHECK-NEXT:    [[T15:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[T16:%.*]] = or i64 [[T15]], 1
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[DATA]], i64 [[T16]]
-; CHECK-NEXT:    [[T17:%.*]] = load double, double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[T16]]
+; CHECK-NEXT:    [[T17:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[T17]]
-; CHECK-NEXT:    store double [[FNEG]], double* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    store double [[FNEG]], ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    br label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -66,10 +66,10 @@ for.body:
 for.latch:
   %t15 = shl nuw nsw i64 %indvars.iv, 1
   %t16 = or i64 %t15, 1
-  %arrayidx = getelementptr inbounds double, double* %data, i64 %t16
-  %t17 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %data, i64 %t16
+  %t17 = load double, ptr %arrayidx, align 8
   %fneg = fneg double %t17
-  store double %fneg, double* %arrayidx, align 8
+  store double %fneg, ptr %arrayidx, align 8
   br label %for.body
 
 for.end:
diff --git a/llvm/test/Transforms/LoopVectorize/unroll_novec.ll b/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
index a95bee3..4858e36 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -9,11 +9,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;    a[i] += i;
 ;  }
 ;CHECK-LABEL: @inc(
-;CHECK: load i32, i32*
-;CHECK: load i32, i32*
-;CHECK: load i32, i32*
-;CHECK: load i32, i32*
-;CHECK-NOT: load i32, i32*
+;CHECK: load i32, ptr
+;CHECK: load i32, ptr
+;CHECK: load i32, ptr
+;CHECK: load i32, ptr
+;CHECK-NOT: load i32, ptr
 ;CHECK: add nsw i32
 ;CHECK: add nsw i32
 ;CHECK: add nsw i32
@@ -32,11 +32,11 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
+  %2 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv
+  %3 = load i32, ptr %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
-  store i32 %5, i32* %2, align 4
+  store i32 %5, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
index 0966322..9394270 100644
--- a/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
@@ -7,34 +7,34 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ;     1	void success (char *A, char *B, char *C, char *D, char *E, int N) {
 ;     2	  for(int i = 0; i < N; i++) {
 ;     3	    A[i + 1] = A[i] + B[i];
-;     4	    C[i] = D[i] * E[i];
+;     4	    C[i] = Dptr E[i];
 ;     5	  }
 ;     6	}
 
 ; CHECK: remark: /tmp/kk.c:3:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
 
-define void @success(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) !dbg !6 {
+define void @success(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture %C, ptr nocapture readonly %D, ptr nocapture readonly %E, i32 %N) !dbg !6 {
 entry:
   %cmp28 = icmp sgt i32 %N, 0, !dbg !8
   br i1 %cmp28, label %for.body, label %for.cond.cleanup, !dbg !9
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !11
-  %0 = load i8, i8* %arrayidx, align 1, !dbg !11, !tbaa !12
-  %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !15
-  %1 = load i8, i8* %arrayidx2, align 1, !dbg !15, !tbaa !12
+  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !11
+  %0 = load i8, ptr %arrayidx, align 1, !dbg !11, !tbaa !12
+  %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !15
+  %1 = load i8, ptr %arrayidx2, align 1, !dbg !15, !tbaa !12
   %add = add i8 %1, %0, !dbg !16
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
-  %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !17
-  store i8 %add, i8* %arrayidx7, align 1, !dbg !18, !tbaa !12
-  %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !19
-  %2 = load i8, i8* %arrayidx9, align 1, !dbg !19, !tbaa !12
-  %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !20
-  %3 = load i8, i8* %arrayidx12, align 1, !dbg !20, !tbaa !12
+  %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !17
+  store i8 %add, ptr %arrayidx7, align 1, !dbg !18, !tbaa !12
+  %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !19
+  %2 = load i8, ptr %arrayidx9, align 1, !dbg !19, !tbaa !12
+  %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !20
+  %3 = load i8, ptr %arrayidx12, align 1, !dbg !20, !tbaa !12
   %mul = mul i8 %3, %2, !dbg !21
-  %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !22
-  store i8 %mul, i8* %arrayidx16, align 1, !dbg !23, !tbaa !12
+  %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !22
+  store i8 %mul, ptr %arrayidx16, align 1, !dbg !23, !tbaa !12
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
   %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9
diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
index c7488b6..0c9fa74 100644
--- a/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
+++ b/llvm/test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
@@ -16,7 +16,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK: remark: <unknown>:0:0: User-specified vectorization factor 4 is unsafe, clamping to maximum safe vectorization factor 2
 ; CHECK-LABEL: @foo
 ; CHECK: <2 x i32>
-define void @foo(i32* %a, i32* %b) {
+define void @foo(ptr %a, ptr %b) {
 entry:
   br label %loop.ph
 
@@ -25,14 +25,14 @@ loop.ph:
 
 loop:
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
+  %1 = load i32, ptr %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %2 = add nuw nsw i64 %iv, 2
-  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %add, i32* %arrayidx5, align 4
+  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %add, ptr %arrayidx5, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond.not = icmp eq i64 %iv.next, 1024
   br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
diff --git a/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll b/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
index 7fac2b9..6579e0e 100644
--- a/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -12,35 +12,33 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @test_vh(
 ; CHECK-NOT: store <4 x i8> undef
 
-define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) {
+define void @test_vh(ptr %ptr265, ptr %ptr266, i32 %sub267) {
 entry:
   br label %loop
 
 loop:
   %inc = phi i32 [ %sub267, %entry ], [ %add, %loop]
   %ext.inc = sext i32 %inc to i64
-  %add.ptr265 = getelementptr inbounds i32, i32* %ptr265, i64 %ext.inc
-  %add.ptr266 = getelementptr inbounds i32, i32* %ptr266, i64 %ext.inc
+  %add.ptr265 = getelementptr inbounds i32, ptr %ptr265, i64 %ext.inc
+  %add.ptr266 = getelementptr inbounds i32, ptr %ptr266, i64 %ext.inc
   %add = add i32 %inc, 9
   %cmp = icmp slt i32 %add, 140
   br i1 %cmp, label %block1, label %loop
 
 block1:
   %sub267.lcssa = phi i32 [ %add, %loop ]
-  %add.ptr266.lcssa = phi i32* [ %add.ptr266, %loop ]
-  %add.ptr265.lcssa = phi i32* [ %add.ptr265, %loop ]
-  %tmp29 = bitcast i32* %add.ptr265.lcssa to i8*
-  %tmp30 = bitcast i32* %add.ptr266.lcssa to i8*
+  %add.ptr266.lcssa = phi ptr [ %add.ptr266, %loop ]
+  %add.ptr265.lcssa = phi ptr [ %add.ptr265, %loop ]
   br label %do.body272
 
 do.body272:
   %row_width.5 = phi i32 [ %sub267.lcssa, %block1 ], [ %dec, %do.body272 ]
-  %sp.4 = phi i8* [ %tmp30, %block1 ], [ %incdec.ptr273, %do.body272 ]
-  %dp.addr.4 = phi i8* [ %tmp29, %block1 ], [ %incdec.ptr274, %do.body272 ]
-  %incdec.ptr273 = getelementptr inbounds i8, i8* %sp.4, i64 1
-  %tmp31 = load i8, i8* %sp.4, align 1
-  %incdec.ptr274 = getelementptr inbounds i8, i8* %dp.addr.4, i64 1
-  store i8 %tmp31, i8* %dp.addr.4, align 1
+  %sp.4 = phi ptr [ %add.ptr266.lcssa, %block1 ], [ %incdec.ptr273, %do.body272 ]
+  %dp.addr.4 = phi ptr [ %add.ptr265.lcssa, %block1 ], [ %incdec.ptr274, %do.body272 ]
+  %incdec.ptr273 = getelementptr inbounds i8, ptr %sp.4, i64 1
+  %tmp31 = load i8, ptr %sp.4, align 1
+  %incdec.ptr274 = getelementptr inbounds i8, ptr %dp.addr.4, i64 1
+  store i8 %tmp31, ptr %dp.addr.4, align 1
   %dec = add i32 %row_width.5, -1
   %cmp276 = icmp eq i32 %dec, 0
   br i1 %cmp276, label %loop.exit, label %do.body272
diff --git a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
index 5a1c3ee..697df07 100644
--- a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
@@ -67,8 +67,8 @@ for.body:
   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %sext = shl i32 %p.09, 24
   %conv = ashr exact i32 %sext, 24
-  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %add = add nsw i32 %conv, %step
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
@@ -115,7 +115,7 @@ for.end:
 ; VF8-NEXT:  [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
 ; VF8-NEXT:  [[MUL0:%.+]] = mul i64 0, %step
 ; VF8-NEXT:  [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
-; VF8:       getelementptr inbounds i32, i32* %in, i64 [[ADD]]
+; VF8:       getelementptr inbounds i32, ptr %in, i64 [[ADD]]
 ; VF8: middle.block:
 
 ; VF1-LABEL: @doit2
@@ -124,7 +124,7 @@ for.end:
 ; VF1: middle.block:
 ;
 
-define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step)  {
+define void @doit2(ptr nocapture readonly %in, ptr nocapture %out, i64 %size, i64 %step)  {
 entry:
   %cmp9 = icmp eq i64 %size, 0
   br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
@@ -143,10 +143,10 @@ for.body:
   %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %sext = shl i64 %w_ix.011, 32
   %idxprom = ashr exact i64 %sext, 32
-  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
-  store i32 %0, i32* %arrayidx1, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, ptr %out, i64 %offset.010
+  store i32 %0, ptr %arrayidx1, align 4
   %add = add i64 %idxprom, %step
   %inc = add nuw i64 %offset.010, 1
   %exitcond = icmp eq i64 %inc, %size
@@ -200,8 +200,8 @@ for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %conv = and i32 %p.09, 255
-  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
-  store i32 %conv, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [250 x i32], ptr @a, i64 0, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx, align 4
   %add = add nsw i32 %conv, %step
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
@@ -221,7 +221,7 @@ for.end:
 ; VF8: store <8 x i32> %vec.ind
 ; VF8: middle.block:
 ;
-define void @test_conv_in_latch_block(i32 %n, i32 %step, i32* noalias %A, i32* noalias %B) {
+define void @test_conv_in_latch_block(i32 %n, i32 %step, ptr noalias %A, ptr noalias %B) {
 entry:
   %wide.trip.count = zext i32 %n to i64
   br label %loop
@@ -229,21 +229,21 @@ entry:
 loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
   %p.09 = phi i32 [ 0, %entry ], [ %add, %latch ]
-  %B.gep = getelementptr inbounds i32, i32* %B, i64 %iv
-  %l = load i32, i32* %B.gep
+  %B.gep = getelementptr inbounds i32, ptr %B, i64 %iv
+  %l = load i32, ptr %B.gep
   %c = icmp eq i32 %l, 0
   br i1 %c, label %then, label %latch
 
 then:
-  %A.gep = getelementptr inbounds i32, i32* %A, i64 %iv
-  store i32 0, i32* %A.gep
+  %A.gep = getelementptr inbounds i32, ptr %A, i64 %iv
+  store i32 0, ptr %A.gep
   br label %latch
 
 latch:
   %sext = shl i32 %p.09, 24
   %conv = ashr exact i32 %sext, 24
   %add = add nsw i32 %conv, %step
-  store i32 %conv, i32* %B.gep, align 4
+  store i32 %conv, ptr %B.gep, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %wide.trip.count
   br i1 %exitcond, label %exit, label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
index 0ca2edd..05aa13b 100644
--- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
@@ -10,7 +10,7 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define void @vectorized(float* nocapture %a, i64 %size) {
+define void @vectorized(ptr nocapture %a, i64 %size) {
 entry:
   %cmp1 = icmp sle i64 %size, 0
   %cmp21 = icmp sgt i64 0, %size
@@ -19,10 +19,10 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv2
-  %0 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv2
+  %0 = load float, ptr %arrayidx, align 4
   %mul = fmul float %0, %0
-  store float %mul, float* %arrayidx, align 4
+  store float %mul, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
   %cmp2 = icmp sgt i64 %indvars.iv.next, %size
   br i1 %cmp2, label %for.end, label %for.body
@@ -31,7 +31,7 @@ for.end:                                          ; preds = %entry, %for.body
   ret void
 }
 
-define void @not_vectorized(float* nocapture %a, i64 %size) {
+define void @not_vectorized(ptr nocapture %a, i64 %size) {
 entry:
   %cmp1 = icmp sle i64 %size, 0
   %cmp21 = icmp sgt i64 0, %size
@@ -41,14 +41,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %0 = add nsw i64 %indvars.iv2, -5
-  %arrayidx = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %0
+  %1 = load float, ptr %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv2, 2
-  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %2
-  %3 = load float, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %2
+  %3 = load float, ptr %arrayidx2, align 4
   %mul = fmul float %1, %3
-  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv2
-  store float %mul, float* %arrayidx4, align 4
+  %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv2
+  store float %mul, ptr %arrayidx4, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
   %cmp2 = icmp sgt i64 %indvars.iv.next, %size
   br i1 %cmp2, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
index cde7872..4f4b4f6 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 
 ;
-define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
+define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @vector_gep_stored(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -16,10 +16,9 @@ define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <4 x i32*>*
-; CHECK-NEXT:    store <4 x i32*> [[TMP0]], <4 x i32*>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -32,9 +31,9 @@ define void @vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]]
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
@@ -46,9 +45,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 %i
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 %i
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -58,7 +57,7 @@ for.end:
 }
 
 ;
-define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
+define void @uniform_vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-LABEL: @uniform_vector_gep_stored(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
@@ -69,12 +68,11 @@ define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32*> poison, i32* [[TMP0]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32*> [[DOTSPLATINSERT]], <4 x i32*> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32*, i32** [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <4 x i32*>*
-; CHECK-NEXT:    store <4 x i32*> [[DOTSPLAT]], <4 x i32*>* [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 1
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -86,9 +84,9 @@ define void @uniform_vector_gep_stored(i32** %a, i32 *%b, i64 %n) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 1
-; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32*, i32** [[A]], i64 [[I]]
-; CHECK-NEXT:    store i32* [[VAR0]], i32** [[VAR1]], align 8
+; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 1
+; CHECK-NEXT:    [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]]
+; CHECK-NEXT:    store ptr [[VAR0]], ptr [[VAR1]], align 8
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -100,9 +98,9 @@ entry:
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
-  %var0 = getelementptr inbounds i32, i32* %b, i64 1
-  %var1 = getelementptr inbounds i32*, i32** %a, i64 %i
-  store i32* %var0, i32** %var1, align 8
+  %var0 = getelementptr inbounds i32, ptr %b, i64 1
+  %var1 = getelementptr inbounds ptr, ptr %a, i64 %i
+  store ptr %var0, ptr %var1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll b/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll
index a21d4d4..228cc94 100644
--- a/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/vectorize-pointer-phis.ll
@@ -2,7 +2,7 @@
 
 %s1 = type { [32000 x double], [32000 x double], [32000 x double] }
 
-define i32 @load_with_pointer_phi_no_runtime_checks(%s1* %data) {
+define i32 @load_with_pointer_phi_no_runtime_checks(ptr %data) {
 ; CHECK-LABEL: @load_with_pointer_phi_no_runtime_checks
 ; CHECK-NOT: memcheck
 ; CHECK:     vector.body:
@@ -14,22 +14,22 @@ loop.header:                                        ; preds = %loop.latch, %entr
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp5 = icmp ult i64 %iv, 15999
-  %arrayidx = getelementptr inbounds %s1, %s1 * %data, i64 0, i32 0, i64 %iv
+  %arrayidx = getelementptr inbounds %s1, ptr %data, i64 0, i32 0, i64 %iv
   br i1 %cmp5, label %if.then, label %if.else
 
 if.then:                                          ; preds = %loop.header
-  %gep.1 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 1, i64 %iv
+  %gep.1 = getelementptr inbounds %s1, ptr %data, i64 0, i32 1, i64 %iv
   br label %loop.latch
 
 if.else:                                          ; preds = %loop.header
-  %gep.2 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 2, i64 %iv
+  %gep.2 = getelementptr inbounds %s1, ptr %data, i64 0, i32 2, i64 %iv
   br label %loop.latch
 
 loop.latch:                                          ; preds = %if.else, %if.then
-  %gep.2.sink = phi double* [ %gep.2, %if.else ], [ %gep.1, %if.then ]
-  %v8 = load double, double* %gep.2.sink, align 8
+  %gep.2.sink = phi ptr [ %gep.2, %if.else ], [ %gep.1, %if.then ]
+  %v8 = load double, ptr %gep.2.sink, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %arrayidx, align 8
+  store double %mul16, ptr %arrayidx, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -37,7 +37,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @store_with_pointer_phi_no_runtime_checks(%s1* %data) {
+define i32 @store_with_pointer_phi_no_runtime_checks(ptr %data) {
 ; CHECK-LABEL: @store_with_pointer_phi_no_runtime_checks
 ; CHECK-NOT: memcheck
 ; CHECK:     vector.body
@@ -49,22 +49,22 @@ loop.header:                                        ; preds = %loop.latch, %entr
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp5 = icmp ult i64 %iv, 15999
-  %arrayidx = getelementptr inbounds %s1, %s1 * %data, i64 0, i32 0, i64 %iv
+  %arrayidx = getelementptr inbounds %s1, ptr %data, i64 0, i32 0, i64 %iv
   br i1 %cmp5, label %if.then, label %if.else
 
 if.then:                                          ; preds = %loop.header
-  %gep.1 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 1, i64 %iv
+  %gep.1 = getelementptr inbounds %s1, ptr %data, i64 0, i32 1, i64 %iv
   br label %loop.latch
 
 if.else:                                          ; preds = %loop.header
-  %gep.2 = getelementptr inbounds %s1, %s1* %data, i64 0, i32 2, i64 %iv
+  %gep.2 = getelementptr inbounds %s1, ptr %data, i64 0, i32 2, i64 %iv
   br label %loop.latch
 
 loop.latch:                                          ; preds = %if.else, %if.then
-  %gep.2.sink = phi double* [ %gep.2, %if.else ], [ %gep.1, %if.then ]
-  %v8 = load double, double* %arrayidx, align 8
+  %gep.2.sink = phi ptr [ %gep.2, %if.else ], [ %gep.1, %if.then ]
+  %v8 = load double, ptr %arrayidx, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %gep.2.sink, align 8
+  store double %mul16, ptr %gep.2.sink, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -72,7 +72,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @store_with_pointer_phi_runtime_checks(double* %A, double* %B, double* %C) {
+define i32 @store_with_pointer_phi_runtime_checks(ptr %A, ptr %B, ptr %C) {
 ; CHECK-LABEL: @store_with_pointer_phi_runtime_checks
 ; CHECK:     memcheck
 ; CHECK:     vector.body
@@ -84,22 +84,22 @@ loop.header:                                        ; preds = %loop.latch, %entr
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp5 = icmp ult i64 %iv, 15999
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %iv
   br i1 %cmp5, label %if.then, label %if.else
 
 if.then:                                          ; preds = %loop.header
-  %gep.1 = getelementptr inbounds double, double* %B, i64 %iv
+  %gep.1 = getelementptr inbounds double, ptr %B, i64 %iv
   br label %loop.latch
 
 if.else:                                          ; preds = %loop.header
-  %gep.2 = getelementptr inbounds double, double* %C, i64 %iv
+  %gep.2 = getelementptr inbounds double, ptr %C, i64 %iv
   br label %loop.latch
 
 loop.latch:                                          ; preds = %if.else, %if.then
-  %gep.2.sink = phi double* [ %gep.2, %if.else ], [ %gep.1, %if.then ]
-  %v8 = load double, double* %arrayidx, align 8
+  %gep.2.sink = phi ptr [ %gep.2, %if.else ], [ %gep.1, %if.then ]
+  %v8 = load double, ptr %arrayidx, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %gep.2.sink, align 8
+  store double %mul16, ptr %gep.2.sink, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -107,7 +107,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @load_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
+define i32 @load_with_pointer_phi_outside_loop(ptr %A, ptr %B, ptr %C, i1 %c.0, i1 %c.1) {
 ; CHECK-LABEL: @load_with_pointer_phi_outside_loop
 ; CHECK-NOT: vector.body
 ;
@@ -118,20 +118,20 @@ if.then:
   br label %loop.ph
 
 if.else:
-  %ptr.select = select i1 %c.1, double* %C, double* %B
+  %ptr.select = select i1 %c.1, ptr %C, ptr %B
   br label %loop.ph
 
 loop.ph:
-  %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ]
+  %ptr = phi ptr [ %A, %if.then ], [ %ptr.select, %if.else ]
   br label %loop.header
 
 loop.header:                                        ; preds = %loop.latch, %entry
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop.header ]
   %iv.next = add nuw nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
-  %v8 = load double, double* %ptr, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %iv
+  %v8 = load double, ptr %ptr, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %arrayidx, align 8
+  store double %mul16, ptr %arrayidx, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
@@ -139,7 +139,7 @@ exit:                                             ; preds = %loop.latch
   ret i32 10
 }
 
-define i32 @store_with_pointer_phi_outside_loop(double* %A, double* %B, double* %C, i1 %c.0, i1 %c.1) {
+define i32 @store_with_pointer_phi_outside_loop(ptr %A, ptr %B, ptr %C, i1 %c.0, i1 %c.1) {
 ; CHECK-LABEL: @store_with_pointer_phi_outside_loop
 ; CHECK-NOT: vector.body
 ;
@@ -150,20 +150,20 @@ if.then:
   br label %loop.ph
 
 if.else:
-  %ptr.select = select i1 %c.1, double* %C, double* %B
+  %ptr.select = select i1 %c.1, ptr %C, ptr %B
   br label %loop.ph
 
 loop.ph:
-  %ptr = phi double* [ %A, %if.then ], [ %ptr.select, %if.else ]
+  %ptr = phi ptr [ %A, %if.then ], [ %ptr.select, %if.else ]
   br label %loop.header
 
 loop.header:                                        ; preds = %loop.latch, %entry
   %iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop.header ]
   %iv.next = add nuw nsw i64 %iv, 1
-  %arrayidx = getelementptr inbounds double, double* %A, i64 %iv
-  %v8 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %iv
+  %v8 = load double, ptr %arrayidx, align 8
   %mul16 = fmul double 3.0, %v8
-  store double %mul16, double* %ptr, align 8
+  store double %mul16, ptr %ptr, align 8
   %exitcond.not = icmp eq i64 %iv.next, 32000
   br i1 %exitcond.not, label %exit, label %loop.header
 
diff --git a/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll b/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll
index ea86c41..dcc144e 100644
--- a/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll
+++ b/llvm/test/Transforms/LoopVectorize/vectorizeVFone.ll
@@ -2,7 +2,7 @@
 
 %type = type { [3 x double] }
 
-define void @getScalarFunc(double* %A, double* %C, %type* %B) {
+define void @getScalarFunc(ptr %A, ptr %C, ptr %B) {
 ; CHECK-LABEL: getScalarFunc
 ; This check will catch also the massv version of the function.
 ; CHECK-NOT: call fast <{{[0-9]+}} x double> @{{.*}}atan(<{{[0-9]+}} x double> %{{[0-9]+}})
@@ -11,9 +11,9 @@ entry:
 
 for.body: 
   %i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %dummyload2 = load double, double* %A, align 8
-  %arrayidx.i24 = getelementptr inbounds %type, %type* %B, i64 %i, i32 0, i32 0
-  %_15 = load double, double* %arrayidx.i24, align 8
+  %dummyload2 = load double, ptr %A, align 8
+  %arrayidx.i24 = getelementptr inbounds %type, ptr %B, i64 %i, i32 0, i32 0
+  %_15 = load double, ptr %arrayidx.i24, align 8
   %call10 = tail call fast double @atan(double %_15) #0
   %inc = add i64 %i, 1
   %cmp = icmp ugt i64 1000, %inc
diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
index 994e942..7bf4fbd 100644
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -9,9 +9,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; memchecks.
 
 ; CHECK-LABEL: test
-define void @test(i32*  %A, i64 %AStride,
-                  i32*  %B, i32 %BStride,
-                  i32*  %C, i64 %CStride, i32 %N) {
+define void @test(ptr  %A, i64 %AStride,
+                  ptr  %B, i32 %BStride,
+                  ptr  %C, i64 %CStride, i32 %N) {
 entry:
   %cmp13 = icmp eq i32 %N, 0
   br i1 %cmp13, label %for.end, label %for.body.preheader
@@ -34,15 +34,15 @@ for.body:
   %iv.trunc = trunc i64 %indvars.iv to i32
   %mul = mul i32 %iv.trunc, %BStride
   %mul64 = zext i32 %mul to i64
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %mul64
-  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %mul64
+  %0 = load i32, ptr %arrayidx, align 4
   %mul2 = mul nsw i64 %indvars.iv, %CStride
-  %arrayidx3 = getelementptr inbounds i32, i32* %C, i64 %mul2
-  %1 = load i32, i32* %arrayidx3, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %mul2
+  %1 = load i32, ptr %arrayidx3, align 4
   %mul4 = mul nsw i32 %1, %0
   %mul3 = mul nsw i64 %indvars.iv, %AStride
-  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %mul3
-  store i32 %mul4, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 %mul3
+  store i32 %mul4, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %N
@@ -62,7 +62,7 @@ for.end:
 ; CHECK-LABEL: fn1
 ; CHECK: load <2 x double>
 
-define void @fn1(double* noalias %x, double* noalias %c, double %a) {
+define void @fn1(ptr noalias %x, ptr noalias %c, double %a) {
 entry:
   %conv = fptosi double %a to i32
   %conv2 = add i32 %conv, 4
@@ -77,10 +77,10 @@ for.body:
   %0 = trunc i64 %indvars.iv to i32
   %mul = mul nsw i32 %0, %conv
   %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %idxprom
-  %1 = load double, double* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %c, i64 %indvars.iv
-  store double %1, double* %arrayidx3, align 8
+  %arrayidx = getelementptr inbounds double, ptr %x, i64 %idxprom
+  %1 = load double, ptr %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+  store double %1, ptr %arrayidx3, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %conv2
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
index 226af97..a6a9cfa 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Verify that -vplan-print-in-dot-format option works.
 
-define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
 ; CHECK:      digraph VPlan {
 ; CHECK-NEXT:  graph [labelloc=t, fontsize=30; label="Vectorization Plan\nInitial VPlan for VF=\{4\},UF\>=1"]
 ; CHECK-NEXT:  node [shape=rect, fontname=Courier, fontsize=30]
@@ -40,11 +40,11 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %y, i64 %iv
-  %lv = load float, float* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds float, ptr %y, i64 %iv
+  %lv = load float, ptr %arrayidx, align 4
   %call = tail call float @llvm.sqrt.f32(float %lv) nounwind readnone
-  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %iv
-  store float %call, float* %arrayidx2, align 4
+  %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %iv
+  store float %call, ptr %arrayidx2, align 4
   %iv.next = add i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, %n
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 2381796..a497522 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -47,15 +47,15 @@ entry:
 
 outer.header:
   %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
-  %gep.1 = getelementptr inbounds [8 x i64], [8 x i64]* @arr2, i64 0, i64 %outer.iv
-  store i64 %outer.iv, i64* %gep.1, align 4
+  %gep.1 = getelementptr inbounds [8 x i64], ptr @arr2, i64 0, i64 %outer.iv
+  store i64 %outer.iv, ptr %gep.1, align 4
   %add = add nsw i64 %outer.iv, %n
   br label %inner
 
 inner:
   %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
-  %gep.2 = getelementptr inbounds [8 x [8 x i64]], [8 x [8 x i64]]* @arr, i64 0, i64 %inner.iv, i64 %outer.iv
-  store i64 %add, i64* %gep.2, align 4
+  %gep.2 = getelementptr inbounds [8 x [8 x i64]], ptr @arr, i64 0, i64 %inner.iv, i64 %outer.iv
+  store i64 %add, ptr %gep.2, align 4
   %inner.iv.next = add nuw nsw i64 %inner.iv, 1
   %inner.ec = icmp eq i64 %inner.iv.next, 8
   br i1 %inner.ec, label %outer.latch, label %inner
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
index c4e7668..08cba06 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Make sure recipes with side-effects are not sunk.
-define void @sink_with_sideeffects(i1 %c, i8* %ptr) {
+define void @sink_with_sideeffects(i1 %c, ptr %ptr) {
 ; CHECK-LABEL: sink_with_sideeffects
 ; CHECK:      VPlan 'Initial VPlan for VF={1},UF>=1' {
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
@@ -60,15 +60,15 @@ entry:
 for.body:
   %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
   %tmp1 = phi i64 [ %tmp7, %for.inc ], [ 0, %entry ]
-  %tmp2 = getelementptr i8, i8* %ptr, i64 %tmp0
-  %tmp3 = load i8, i8* %tmp2, align 1
-  store i8 0, i8* %tmp2
+  %tmp2 = getelementptr i8, ptr %ptr, i64 %tmp0
+  %tmp3 = load i8, ptr %tmp2, align 1
+  store i8 0, ptr %tmp2
   %tmp4 = zext i8 %tmp3 to i32
   %tmp5 = trunc i32 %tmp4 to i8
   br i1 %c, label %if.then, label %for.inc
 
 if.then:
-  store i8 %tmp5, i8* %tmp2, align 1
+  store i8 %tmp5, ptr %tmp2, align 1
   br label %for.inc
 
 for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll b/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
index 784c390..eb3e069 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll
@@ -17,17 +17,17 @@ entry:
 
 for.body:                                         ; preds = %for.inc8, %entry
   %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
-  %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @arr2, i64 0, i64 %indvars.iv21
+  %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
   %0 = trunc i64 %indvars.iv21 to i32
-  store i32 %0, i32* %arrayidx, align 4
+  store i32 %0, ptr %arrayidx, align 4
   %1 = trunc i64 %indvars.iv21 to i32
   %add = add nsw i32 %1, %n
   br label %for.body3
 
 for.body3:                                        ; preds = %for.body3, %for.body
   %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
-  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
-  store i32 %add, i32* %arrayidx7, align 4
+  %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
+  store i32 %add, ptr %arrayidx7, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.inc8, label %for.body3
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
index a35da73..797a502 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-select-instruction.ll
@@ -9,7 +9,7 @@
 ; * Select conditition depending on both outer and inner loop iteration
 ;   variables.
 
-define void @loop_invariant_select(double* noalias nocapture %out, i1 %select, double %a, double %b) {
+define void @loop_invariant_select(ptr noalias nocapture %out, i1 %select, double %a, double %b) {
 ; CHECK-LABEL: @loop_invariant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -22,25 +22,25 @@ define void @loop_invariant_select(double* noalias nocapture %out, i1 %select, d
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP2:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[SELECT:%.*]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP1]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP1]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
   %indvar2 = phi i64 [ 0, %for1.header ], [ %indvar21, %for2.header ]
   ; Select condition is loop invariant for both inner and outer loop.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header
@@ -54,7 +54,7 @@ exit:
   ret void
 }
 
-define void @outer_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) {
+define void @outer_loop_dependant_select(ptr noalias nocapture %out, double %a, double %b) {
 ; CHECK-LABEL: @outer_loop_dependant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -67,19 +67,19 @@ define void @outer_loop_dependant_select(double* noalias nocapture %out, double
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP2]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
@@ -87,7 +87,7 @@ for2.header:
   %select = trunc i64 %indvar1 to i1
   ; Select condition only depends on outer loop iteration variable.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header
@@ -101,7 +101,7 @@ exit:
   ret void
 }
 
-define void @inner_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) {
+define void @inner_loop_dependant_select(ptr noalias nocapture %out, double %a, double %b) {
 ; CHECK-LABEL: @inner_loop_dependant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -114,19 +114,19 @@ define void @inner_loop_dependant_select(double* noalias nocapture %out, double
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP3:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[VEC_PHI]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP2]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP2]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
@@ -134,7 +134,7 @@ for2.header:
   %select = trunc i64 %indvar2 to i1
   ; Select condition only depends on inner loop iteration variable.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header
@@ -148,7 +148,7 @@ exit:
   ret void
 }
 
-define void @outer_and_inner_loop_dependant_select(double* noalias nocapture %out, double %a, double %b) {
+define void @outer_and_inner_loop_dependant_select(ptr noalias nocapture %out, double %a, double %b) {
 ; CHECK-LABEL: @outer_and_inner_loop_dependant_select(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -161,20 +161,20 @@ define void @outer_and_inner_loop_dependant_select(double* noalias nocapture %ou
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR1_LATCH4:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR1_LATCH4]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, double* [[OUT:%.*]], <4 x i64> [[VEC_IND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[OUT:%.*]], <4 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    br label [[FOR2_HEADER1:%.*]]
 ; CHECK:       for2.header1:
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR2_HEADER1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x double> [[BROADCAST_SPLAT]], <4 x double> [[BROADCAST_SPLAT3]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> [[TMP3]], <4 x double*> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP3]], <4 x ptr> [[TMP0]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 entry:
   br label %for1.header
 
 for1.header:
   %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
-  %ptr = getelementptr inbounds double, double* %out, i64 %indvar1
+  %ptr = getelementptr inbounds double, ptr %out, i64 %indvar1
   br label %for2.header
 
 for2.header:
@@ -183,7 +183,7 @@ for2.header:
   %select = trunc i64 %sum to i1
   ; Select condition depends on both inner and outer loop iteration variables.
   %select.b = select i1 %select, double %a, double %b
-  store double %select.b, double* %ptr, align 8
+  store double %select.b, ptr %ptr, align 8
   %indvar21 = add nuw nsw i64 %indvar2, 1
   %for2.cond = icmp eq i64 %indvar21, 10000
   br i1 %for2.cond, label %for1.latch, label %for2.header
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 7947a49..261b924 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -10,7 +10,7 @@
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) {
+define void @foo(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) {
 entry:
   %cmp32 = icmp sgt i32 %N, 0
   br i1 %cmp32, label %outer.ph, label %for.end15
@@ -33,10 +33,10 @@ inner.ph:
 inner.body:
   %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
   %2 = add nsw i64 %indvars.iv, %1
-  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
-  %3 = load i32, i32* %arrayidx, align 4
-  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
-  store i32 %3, i32* %arrayidx12, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
+  %3 = load i32, ptr %arrayidx, align 4
+  %arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
+  store i32 %3, ptr %arrayidx12, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
   br i1 %exitcond, label %outer.inc, label %inner.body
diff --git a/llvm/test/Transforms/LoopVectorize/write-only.ll b/llvm/test/Transforms/LoopVectorize/write-only.ll
index f99aed6..cc21b94 100644
--- a/llvm/test/Transforms/LoopVectorize/write-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/write-only.ll
@@ -5,16 +5,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ;CHECK-LABEL: @read_mod_write_single_ptr(
 ;CHECK: load <4 x float>
 ;CHECK: ret i32
-define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
   %4 = fmul float %3, 3.000000e+00
-  store float %4, float* %2, align 4
+  store float %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -28,16 +28,16 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
 ; CHECK-LABEL: @read_mod_write_single_ptr_volatile_store(
 ; CHECK-NOT: store <4 x float>
 ; CHECK: ret i32
-define i32 @read_mod_write_single_ptr_volatile_store(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+define i32 @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) nounwind uwtable ssp {
   %1 = icmp sgt i32 %n, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
 
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float, float* %2, align 4
+  %2 = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %2, align 4
   %4 = fmul float %3, 3.000000e+00
-  store volatile float %4, float* %2, align 4
+  store volatile float %4, ptr %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
-- 
2.7.4